From 0b6f994099372d2f74c646d917a85a770542c6a2 Mon Sep 17 00:00:00 2001 From: Miguel Pineda <110496466+ma-pineda@users.noreply.github.com> Date: Mon, 12 Aug 2024 09:43:28 -0600 Subject: [PATCH 01/50] Adding classical-ml and data-analytics presets (#255) Signed-off-by: Tyler Titsworth Signed-off-by: tylertitsworth Co-authored-by: Tyler Titsworth Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jafraustro --- .github/workflows/container-ci.yaml | 9 +- .github/workflows/integration-test.yaml | 2 +- preset/classical-ml/Dockerfile | 128 ++--- preset/classical-ml/docker-compose.yaml | 82 ++- preset/classical-ml/requirements.txt | 2 +- preset/classical-ml/tests/scikit/kmeans.py | 1 + .../classical-ml/tests/scikit/test_scikit.sh | 4 +- preset/data-analytics/Dockerfile | 115 ++-- preset/data-analytics/docker-compose.yaml | 66 +-- preset/data-analytics/requirements.txt | 2 +- preset/deep-learning/Dockerfile | 527 ++++++++---------- preset/deep-learning/docker-compose.yaml | 201 +++---- preset/deep-learning/requirements.txt | 15 +- preset/deep-learning/tests.yaml | 33 +- preset/inference-optimization/Dockerfile | 39 +- .../docker-compose.yaml | 200 +++---- .../inference-optimization/requirements.txt | 19 +- preset/inference-optimization/tests.yaml | 45 +- 18 files changed, 709 insertions(+), 781 deletions(-) mode change 100644 => 100755 preset/deep-learning/tests.yaml diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 08669ffa..6dbf8532 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -63,7 +63,7 @@ jobs: setup-build: outputs: matrix: ${{ steps.build-matrix.outputs.matrix }} - runs-on: ubuntu-latest # ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 @@ -79,13 +79,14 @@ jobs: build-containers: needs: [setup-build] env: ${{ matrix }} - runs-on: ubuntu-latest # ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} strategy: matrix: ${{ fromJson(needs.setup-build.outputs.matrix) }} fail-fast: false outputs: group: ${{ steps.build-group.outputs.container-group }} steps: + - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 if: ${{ !inputs.no_build }} - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 @@ -111,7 +112,7 @@ jobs: setup-scan: needs: [build-containers] if: ${{ github.event_name == 'pull_request' }} - runs-on: ubuntu-latest # ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} outputs: matrix: ${{ steps.scan-matrix.outputs.matrix }} steps: @@ -164,7 +165,7 @@ jobs: #################################################################################################### setup-test: needs: [build-containers] - runs-on: ubuntu-latest # ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} outputs: matrix: ${{ steps.test-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index af6f4cc2..2a102efd 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -113,7 +113,7 @@ jobs: path: output.txt recreate: true status-check: - needs: [group-diff, pipeline-ci] + needs: [group-diff, pipeline-ci, merge-logs] runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} if: always() steps: diff --git a/preset/classical-ml/Dockerfile b/preset/classical-ml/Dockerfile index a9666e3a..bd6cebde 100644 --- a/preset/classical-ml/Dockerfile +++ b/preset/classical-ml/Dockerfile @@ -12,40 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. + ARG BASE_IMAGE="ubuntu" ARG BASE_TAG="22.04" -FROM ${BASE_IMAGE}:${BASE_TAG} as classical-ml-base +FROM ${BASE_IMAGE}:${BASE_TAG} as classical-ml ENV DEBIAN_FRONTEND=noninteractive -# See http://bugs.python.org/issue19846 - ENV LANG=C.UTF-8 SHELL ["/bin/bash", "-c"] RUN apt-get update -y && \ apt-get install -y --no-install-recommends --fix-missing \ - bzip2 \ - ca-certificates \ - diffutils \ - gcc \ - git \ - gzip \ - make \ - patch \ - rsync \ - unzip \ - wget \ - xz-utils && \ + bzip2 \ + ca-certificates \ + diffutils \ + gcc \ + git \ + gzip \ + make \ + patch \ + rsync \ + unzip \ + wget \ + xz-utils && \ rm -rf /var/lib/apt/lists/* -FROM classical-ml-base as classical-ml-python - -# Setting up non-root directories RUN useradd --uid 1000 -d /home/dev -s /bin/bash -m dev -# Set a password for the user (Optional) RUN echo 'dev:password' | chpasswd USER dev WORKDIR /home/dev @@ -56,68 +51,69 @@ ARG PYTHON_VERSION ARG IDP_VERSION ARG INTEL_CHANNEL -RUN wget --progress=dot:giga --no-check-certificate https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${MINIFORGE_VERSION}.sh -O miniforge.sh && \ +RUN wget --progress=dot:giga --no-check-certificate https://github.com/conda-forge/miniforge/releases/latest/download/${MINIFORGE_VERSION}.sh -O miniforge.sh && \ chmod +x miniforge.sh && \ ./miniforge.sh -b -p "${CONDA_ROOT}" && \ rm ./miniforge.sh && \ - ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge3" && \ + ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge" && \ export PATH="${CONDA_ROOT}/bin/:${PATH}" && \ - conda update -y conda && \ - conda config --add channels conda-forge && \ - conda config --add channels https://software.repos.intel.com/python/conda/ && \ conda init --all && \ conda install -y \ - 'jupyterlab>=4.1.8' \ - 'notebook>=7.1.3' \ - 'jupyterhub>=4.1.5' \ - 'jupyter-server-proxy>=4.1.2' \ - 'mako>=1.2.2' \ - 'pyjwt>=2.4.0' \ - 'cryptography>=42.0.5' \ - 'nodejs>=20.12.2' \ - 'aiohttp>=3.9.4' \ + 'colorama==0.4.6' \ + 'conda==24.5.0' \ + 'jupyterhub==5.1.0' \ + 'jupyter-server-proxy==4.3.0' \ + 'mamba==1.5.8' \ + 'networkx==3.3' \ + 'notebook==7.2.1' \ + 'pip==24.0' \ + 'python==3.10.14' \ 'idna>=3.7' \ - 'oauthlib>=3.2.2' \ - && \ - jupyter labextension disable "@jupyterlab/apputils-extension:announcements" && \ - conda clean -y --all + 'requests>=2.32.0' \ + 'setuptools>=70.0.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + 'nodejs==22.5.1' \ + && \ + jupyter labextension disable "@jupyterlab/apputils-extension:announcements" \ + && \ + conda clean -y --all \ + && \ + conda config --add channels ${INTEL_CHANNEL} ENV PATH ${CONDA_ROOT}/condabin:${CONDA_ROOT}/bin/:${PATH} +RUN conda config --set pip_interop_enabled True ARG IDP_VERSION +ARG DAAL4PY_VERSION ARG DPNP_VERSION ARG XGBOOST_VERSION ARG MODIN_VERSION ARG NUMPY_VERSION ARG SKLEARNEX_VERSION -# Conda packages -RUN conda create -yn classical-ml -c ${INTEL_CHANNEL} -c conda-forge \ - dpnp=${DPNP_VERSION} \ - numpy=${NUMPY_VERSION} \ - python=${PYTHON_VERSION} \ - scikit-learn-intelex==${SKLEARNEX_VERSION} \ - xgboost=${XGBOOST_VERSION} \ - modin-ray=${MODIN_VERSION} \ - 'python-dotenv>=1.0.1' \ - 'tqdm>=4.66.2' \ - 'matplotlib-base>=3.4.3' \ - 'threadpoolctl>=3.3.0' \ - 'ipython>=8.18.1' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'protobuf>=4.24' \ - 'pillow>=10.2.0' \ - 'tornado>=6.3.3' && \ +RUN conda create -yn classical-ml \ + "python=${PYTHON_VERSION}" \ + "daal4py=${DAAL4PY_VERSION}" \ + "dpnp=${DPNP_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base==3.8.4' \ + "modin-ray=${MODIN_VERSION}" \ + 'python-dotenv==1.0.1' \ + "scikit-learn-intelex=${SKLEARNEX_VERSION}" \ + 'tqdm==4.66.4' \ + "xgboost=${XGBOOST_VERSION}" \ + 'idna>=3.7' \ + 'requests>=2.32.0' \ + 'setuptools>=70.0.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + && \ conda clean -y --all - - -# PyPI packages RUN conda run -n classical-ml python -m pip install --no-deps --no-cache-dir \ - 'dataset-librarian==1.0.4' \ - 'cloud-data-connector==1.0.3' - + 'dataset-librarian==1.0.4' ENV PYTHONSTARTUP=~/.patch_sklearn.py COPY base/.patch_sklearn.py ~/.patch_sklearn.py @@ -125,8 +121,6 @@ COPY base/.patch_sklearn.py ~/.patch_sklearn.py ENV PYTHONSTARTUP=/home/dev/.patch_sklearn.py COPY base/.patch_sklearn.py /home/dev/.patch_sklearn.py -FROM classical-ml-python as classical-ml-jupyter - EXPOSE 8888 RUN mkdir -p ~/jupyter/ && chmod -R a+rwx ~/jupyter/ && \ @@ -136,10 +130,10 @@ WORKDIR /home/dev COPY --chown=dev notebooks /home/dev/jupyter COPY --chown=dev tests /home/dev/sample-tests -RUN "${CONDA_ROOT}/envs/classical-ml/bin/python" -m ipykernel install --user --name classical-ml --display-name "Classical ML" && \ - "${CONDA_ROOT}/envs/classical-ml/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo classical-ml | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - "${CONDA_ROOT}/envs/classical-ml/bin/python" -m ipykernel.kernelspec --user && \ - conda clean -y --all +RUN KERNEL_DIR="${CONDA_ROOT}/share/jupyter/kernels/classical-ml" && \ + conda run -n classical-ml python -m ipykernel install --prefix "$CONDA_ROOT" --name classical-ml --display-name "Classical ML" && \ + conda run -n classical-ml kernda -o -y "$KERNEL_DIR/kernel.json" && \ + conda run -n base jupyter kernelspec list CMD ["bash", "-c", "source activate classical-ml && jupyter lab --notebook-dir=~/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/preset/classical-ml/docker-compose.yaml b/preset/classical-ml/docker-compose.yaml index a6e06fbd..c2dc9c1a 100644 --- a/preset/classical-ml/docker-compose.yaml +++ b/preset/classical-ml/docker-compose.yaml @@ -15,6 +15,7 @@ # -*- coding: utf-8 -*- # + version: '3' services: classical-ml: @@ -22,28 +23,30 @@ services: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} BASE_TAG: ${BASE_TAG:-22.04} - DPNP_VERSION: ${NUMBA_DPEX_VERSION:-0.14.0} - IDP_VERSION: ${IDP_VERSION:-2024.1.0} + DAAL4PY_VERSION: ${DAAL4PY_VERSION:-2024.5.0} + DPNP_VERSION: ${DPNP_VERSION:-0.15.0} + IDP_VERSION: ${IDP_VERSION:-2024.2} INTEL_CHANNEL: ${INTEL_CHANNEL:-https://software.repos.intel.com/python/conda/} - MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} - MODIN_VERSION: ${MODIN_VERSION:-0.26.1} - MPI_VERSION: ${MPI_VERSION:-2021.12.0} - NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.22.1} + MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Miniforge3-Linux-x86_64} + MODIN_VERSION: ${MODIN_VERSION:-0.30.0} + MPI_VERSION: ${MPI_VERSION:-2021.13} + NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.23.0} NUMPY_VERSION: ${NUMPY_VERSION:-1.26.4} - PYTHON_VERSION: ${PYTHON_VERSION:-3.10} - SKLEARNEX_VERSION: ${SKLEARNEX_VERSION:-2024.2.0} + PYTHON_VERSION: ${PYTHON_VERSION:-3.9} + SKLEARNEX_VERSION: ${SKLEARNEX_VERSION:-2024.5.0} XGBOOST_VERSION: ${XGBOOST_VERSION:-2.0.3} http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: '' context: . + target: classical-ml labels: docs: classical_ml org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Classical ML" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/classical-ml" - org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} - dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.2.0-py${PYTHON_VERSION:-3.9} + dependency.python: ${PYTHON_VERSION:-3.9} dependency.python.pip: requirements.txt dependency.apt.bzip2: true dependency.apt.ca-certificates: true @@ -57,39 +60,26 @@ services: dependency.apt.unzip: true dependency.apt.wget: true dependency.apt.xz-utils: true - dependency.conda.jupyterlab: '>=4.1.8' - dependency.conda.notebook: '>=7.1.3' - dependency.conda.jupyterhub: '>=4.1.5' - dependency.conda.jupyter-server-proxy: '>=4.1.2' - dependency.conda.mako: '>=1.2.2' - dependency.conda.pyjwt: '>=2.4.0' - dependency.conda.cryptography: '>=42.0.5' - dependency.conda.nodejs: '>=20.12.2' - dependency.conda.aiohttp: '>=3.9.4' - dependency.conda.idna: '>=3.7' - dependency.conda.oauthlib: '>=3.2.2' - dependency.conda.dpnp: '>=0.14.0' - dependency.conda.numpy: '>=1.26.4' - dependency.conda.python: "=${PYTHON_VERSION:-3.10}" - dependency.conda.scikit-learn-intelex: '>=2024.2.0' - dependency.conda.xgboost: '>=2.0.3' - dependency.conda.modin-ray: '>=0.26.1' - dependency.conda.python-dotenv: '>=1.0.1' - dependency.conda.tqdm: '>=4.66.2' - dependency.conda.matplotlib-base: '>=3.4.3' - dependency.conda.dataset_librarian: '>=1.0.4' - dependency.conda.threadpoolctl: '>=3.3.0' - dependency.conda.ipython: '>=8.18.1' - dependency.conda.ipykernel: '>=6.29.3' - dependency.conda.kernda: '>=0.3.0' - dependency.conda.protobuf: '>=4.24' - dependency.conda.pillow: '>=10.2.0' - dependency.conda.tornado: '>=6.3.3' - target: classical-ml-jupyter - command: | - bash -c "conda run -n classical-ml python -c 'import sklearn; import xgboost; print(\"SciKit:\", sklearn.__version__, \" XGBoost:\",xgboost.__version__)' && \ - conda run -n classical-ml python -c 'import modin.pandas as pd, modin.config as cfg; cfg.Engine.put(\"Ray\"); df = pd.DataFrame([1]);print(df+1)'" - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.conda.colorama: '==0.4.6' + dependency.conda.conda: '==24.5.0' + dependency.conda.daal4py: '=2024.5.0' + dependency.conda.dpnp: '=0.15.0' + dependency.conda.ipykernel: '==6.29.5' + dependency.conda.jupyterhub: '==5.1.0' + dependency.conda.jupyter-server-proxy: '==4.3.0' + dependency.conda.kernda: '==0.3.0' + dependency.conda.mamba: '==1.5.8' + dependency.conda.matplotlib-base: '==3.8.4' + dependency.conda.modin-ray: '=0.30.0' + dependency.conda.networkx: '==3.3' + dependency.conda.notebook: '==7.2.1' + dependency.conda.pip: '==24.0' + dependency.conda.python: '==3.10.14' + dependency.conda.python-dotenv: '==1.0.1' + dependency.conda.scikit-learn-intelex: '=2024.5.0' + dependency.conda.tqdm: '==4.66.4' + dependency.conda.xgboost: '=2.0.3' + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} @@ -97,3 +87,9 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path + command: > + bash -c " conda run -n classical-ml python -c 'import sklearn;import xgboost;print(\"SciKit:\", + sklearn.__version__, \" XGBoost:\", xgboost.__version__)' && + + conda run -n classical-ml python -c 'import modin.pandas as pd;import modin.config + as cfg;cfg.Engine.put(\"Ray\");df = pd.DataFrame([1]);print(df+1)' " diff --git a/preset/classical-ml/requirements.txt b/preset/classical-ml/requirements.txt index d231202d..8fe3dfff 100644 --- a/preset/classical-ml/requirements.txt +++ b/preset/classical-ml/requirements.txt @@ -1 +1 @@ -cloud-data-connector==1.0.3 +dataset-librarian==1.0.4 diff --git a/preset/classical-ml/tests/scikit/kmeans.py b/preset/classical-ml/tests/scikit/kmeans.py index 9120b7d0..c78acba7 100644 --- a/preset/classical-ml/tests/scikit/kmeans.py +++ b/preset/classical-ml/tests/scikit/kmeans.py @@ -62,6 +62,7 @@ data, labels = load_digits(return_X_y=True) (n_samples, n_features), n_digits = data.shape, np.unique(labels).size +data = np.array(data, dtype=np.float64) print(f"# digits: {n_digits}; # samples: {n_samples}; # features {n_features}") diff --git a/preset/classical-ml/tests/scikit/test_scikit.sh b/preset/classical-ml/tests/scikit/test_scikit.sh index a6b2f24e..9d16e938 100755 --- a/preset/classical-ml/tests/scikit/test_scikit.sh +++ b/preset/classical-ml/tests/scikit/test_scikit.sh @@ -14,8 +14,8 @@ # limitations under the License. set -xe + SCRIPT_DIR=$(dirname "$0") python "${SCRIPT_DIR}/kmeans.py" - -python "${SCRIPT_DIR}/kmeans.py" true +python "${SCRIPT_DIR}/kmeans.py" true # Enable intel opt diff --git a/preset/data-analytics/Dockerfile b/preset/data-analytics/Dockerfile index 37954c83..ffb56ceb 100644 --- a/preset/data-analytics/Dockerfile +++ b/preset/data-analytics/Dockerfile @@ -12,107 +12,100 @@ # See the License for the specific language governing permissions and # limitations under the License. + ARG BASE_IMAGE="ubuntu" ARG BASE_TAG="22.04" -FROM ${BASE_IMAGE}:${BASE_TAG} as data-analytics-base +FROM ${BASE_IMAGE}:${BASE_TAG} as data-analytics ENV DEBIAN_FRONTEND=noninteractive -# See http://bugs.python.org/issue19846 - ENV LANG=C.UTF-8 SHELL ["/bin/bash", "-c"] RUN apt-get update -y && \ apt-get install -y --no-install-recommends --fix-missing \ - bzip2 \ - ca-certificates \ - diffutils \ - gcc \ - git \ - gzip \ - make \ - patch \ - rsync \ - unzip \ - wget \ - xz-utils && \ + bzip2 \ + ca-certificates \ + diffutils \ + gcc \ + git \ + gzip \ + make \ + patch \ + rsync \ + unzip \ + wget \ + xz-utils && \ rm -rf /var/lib/apt/lists/* -FROM data-analytics-base as data-analytics-python - -# Setting up non-root directories RUN useradd --uid 1000 -d /home/dev -s /bin/bash -m dev -# Set a password for the user (Optional) RUN echo 'dev:password' | chpasswd USER dev WORKDIR /home/dev ENV CONDA_ROOT=/home/dev/conda - ARG MINIFORGE_VERSION ARG PYTHON_VERSION ARG IDP_VERSION ARG INTEL_CHANNEL -RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${MINIFORGE_VERSION}.sh" -O miniforge.sh && \ +RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-forge/miniforge/releases/latest/download/${MINIFORGE_VERSION}.sh" -O miniforge.sh && \ chmod +x miniforge.sh && \ ./miniforge.sh -b -p "${CONDA_ROOT}" && \ rm ./miniforge.sh && \ - ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge3" && \ + ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge" && \ export PATH="${CONDA_ROOT}/bin/:${PATH}" && \ - conda update -y conda && \ - conda config --add channels conda-forge && \ - conda config --add channels https://software.repos.intel.com/python/conda/ && \ conda init --all && \ conda install -y \ - 'jupyterlab>=4.1.8' \ - 'notebook>=7.1.3' \ - 'jupyterhub>=4.1.5' \ - 'jupyter-server-proxy>=4.1.2' \ - 'mako>=1.2.2' \ - 'pyjwt>=2.4.0' \ - 'cryptography>=42.0.5' \ - 'nodejs>=20.12.2' \ + 'colorama==0.4.6' \ + 'conda==24.5.0' \ + 'jupyterhub==5.1.0' \ + 'jupyter-server-proxy==4.3.0' \ + 'mamba==1.5.8' \ + 'networkx==3.3' \ + 'notebook==7.2.1' \ + 'python==3.10.14' \ 'idna>=3.7' \ - 'tqdm>=4.66.2' \ - && \ - jupyter labextension disable "@jupyterlab/apputils-extension:announcements" && \ - conda clean -y --all + 'requests>=2.32.0' \ + 'setuptools>=70.0.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + 'nodejs==22.5.1' \ + && \ + jupyter labextension disable "@jupyterlab/apputils-extension:announcements" \ + && \ + conda clean -y --all \ + && \ + conda config --add channels ${INTEL_CHANNEL} ENV PATH ${CONDA_ROOT}/condabin:${CONDA_ROOT}/bin/:${PATH} +RUN conda config --set pip_interop_enabled True ARG IDP_VERSION ARG DPNP_VERSION ARG MODIN_VERSION ARG NUMPY_VERSION -# data-analytics Env - conda packages -RUN conda create -yn data-analytics -c "${INTEL_CHANNEL}" -c conda-forge \ - dpnp="${DPNP_VERSION}" \ - numpy="${NUMPY_VERSION}" \ - python="${PYTHON_VERSION}" \ - modin-ray="${MODIN_VERSION}" \ - 'python-dotenv>=1.0.1' \ - 'tqdm>=4.66.2' \ - 'matplotlib-base>=3.4.3' \ - 'threadpoolctl>=3.3.0' \ - 'ipython>=8.18.1' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'protobuf>=4.24.4' \ - 'pillow>=10.2.0' \ +RUN conda create -yn data-analytics \ + "python=${PYTHON_VERSION}" \ + "dpnp=${DPNP_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base==3.8.4' \ + "modin-ray=${MODIN_VERSION}" \ + 'python-dotenv==1.0.1' \ 'idna>=3.7' \ - 'tornado>=6.3.3' && \ + 'requests>=2.32.0' \ + 'setuptools>=70.0.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + && \ conda clean -y --all RUN conda run -n data-analytics python -m pip install --no-deps --no-cache-dir \ - 'dataset-librarian==1.0.4' \ - 'cloud-data-connector==1.0.3' - -FROM data-analytics-python as data-analytics-jupyter + 'dataset-librarian==1.0.4' EXPOSE 8888 @@ -122,10 +115,10 @@ RUN mkdir -p ~/jupyter/ && chmod -R a+rwx ~/jupyter/ && \ COPY --chown=dev notebooks /home/dev/jupyter COPY --chown=dev tests /home/dev/sample-tests -RUN "${CONDA_ROOT}/envs/data-analytics/bin/python" -m ipykernel install --user --name data-analytics --display-name "Data Analytics" && \ - "${CONDA_ROOT}/envs/data-analytics/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo data-analytics | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - "${CONDA_ROOT}/envs/data-analytics/bin/python" -m ipykernel.kernelspec --user && \ - conda clean -y --all +RUN KERNEL_DIR="${CONDA_ROOT}/share/jupyter/kernels/data-analytics" && \ + conda run -n data-analytics python -m ipykernel install --prefix "$CONDA_ROOT" --name data-analytics --display-name "Data Analytics" && \ + conda run -n data-analytics kernda -o -y "$KERNEL_DIR/kernel.json" && \ + conda run -n base jupyter kernelspec list CMD ["bash", "-c", "source activate data-analytics && jupyter lab --notebook-dir=~/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/preset/data-analytics/docker-compose.yaml b/preset/data-analytics/docker-compose.yaml index 99b37f6d..9c00331e 100644 --- a/preset/data-analytics/docker-compose.yaml +++ b/preset/data-analytics/docker-compose.yaml @@ -15,6 +15,7 @@ # -*- coding: utf-8 -*- # + version: '3' services: data-analytics: @@ -22,26 +23,26 @@ services: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} BASE_TAG: ${BASE_TAG:-22.04} - DPNP_VERSION: ${NUMBA_DPEX_VERSION:-0.14.0} - IDP_VERSION: ${IDP_VERSION:-2024.1.0} + DPNP_VERSION: ${DPNP_VERSION:-0.15.0} + IDP_VERSION: ${IDP_VERSION:-2024.2} INTEL_CHANNEL: ${INTEL_CHANNEL:-https://software.repos.intel.com/python/conda/} - MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} - MODIN_VERSION: ${MODIN_VERSION:-0.26.1} - MPI_VERSION: ${MPI_VERSION:-2021.12.0} - NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.22.1} + MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Miniforge3-Linux-x86_64} + MODIN_VERSION: ${MODIN_VERSION:-0.30.0} NUMPY_VERSION: ${NUMPY_VERSION:-1.26.4} - PYTHON_VERSION: ${PYTHON_VERSION:-3.10} + PYTHON_VERSION: ${PYTHON_VERSION:-3.9} + XGBOOST_VERSION: ${XGBOOST_VERSION:-2.0.3} http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: '' context: . + target: data-analytics labels: docs: data_analytics org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Data Analytics" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/data-analytics" - org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} - dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.2.0-py${PYTHON_VERSION:-3.9} + dependency.python: ${PYTHON_VERSION:-3.9} dependency.python.pip: requirements.txt dependency.apt.bzip2: true dependency.apt.ca-certificates: true @@ -55,34 +56,21 @@ services: dependency.apt.unzip: true dependency.apt.wget: true dependency.apt.xz-utils: true - dependency.conda.jupyterlab: '>=4.1.8' - dependency.conda.notebook: '>=7.1.3' - dependency.conda.jupyterhub: '>=4.1.5' - dependency.conda.jupyter-server-proxy: '>=4.1.2' - dependency.conda.mako: '>=1.2.2' - dependency.conda.pyjwt: '>=2.4.0' - dependency.conda.cryptography: '>=42.0.5' - dependency.conda.nodejs: '>=20.12.2' - dependency.conda.idna: '>=3.7' - dependency.conda.tqdm: '>=4.66.2' - dependency.conda.dpnp: '>=0.14.0' - dependency.conda.numpy: '>=1.26.4' - dependency.conda.python: "=${PYTHON_VERSION:-3.10}" - dependency.conda.modin-ray: '>=0.26.1' - dependency.conda.python-dotenv: '>=1.0.1' - dependency.conda.matplotlib-base: '>=3.4.3' - dependency.conda.dataset_librarian: '>=1.0.4' - dependency.conda.threadpoolctl: '>=3.3.0' - dependency.conda.ipython: '>=8.18.1' - dependency.conda.ipykernel: '>=6.29.3' - dependency.conda.kernda: '>=0.3.0' - dependency.conda.protobuf: '>=4.24.4' - dependency.conda.pillow: '>=10.2.0' - dependency.conda.tornado: '>=6.3.3' - target: data-analytics-jupyter - command: > - bash -c "conda run -n data-analytics python -c 'import modin.pandas as pd, modin.config as cfg; cfg.Engine.put(\"Ray\"); df = pd.DataFrame([1]);print(df+1)'" - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.conda.colorama: '==0.4.6' + dependency.conda.conda: '==24.5.0' + dependency.conda.dpnp: '=0.15.0' + dependency.conda.ipykernel: '==6.29.5' + dependency.conda.jupyterhub: '==5.1.0' + dependency.conda.jupyter-server-proxy: '==4.3.0' + dependency.conda.kernda: '==0.3.0' + dependency.conda.mamba: '==1.5.8' + dependency.conda.matplotlib-base: '==3.8.4' + dependency.conda.modin-ray: '=0.30.0' + dependency.conda.networkx: '==3.3' + dependency.conda.notebook: '==7.2.1' + dependency.conda.python: '==3.10.14' + dependency.conda.python-dotenv: '==1.0.1' + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} @@ -90,3 +78,7 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path + command: > + bash -c " conda run -n data-analytics python -c 'import modin.pandas as pd;import + modin.config as cfg;cfg.Engine.put(\"Ray\");df = pd.DataFrame([1]);print(df+1)' + " diff --git a/preset/data-analytics/requirements.txt b/preset/data-analytics/requirements.txt index d231202d..8fe3dfff 100644 --- a/preset/data-analytics/requirements.txt +++ b/preset/data-analytics/requirements.txt @@ -1 +1 @@ -cloud-data-connector==1.0.3 +dataset-librarian==1.0.4 diff --git a/preset/deep-learning/Dockerfile b/preset/deep-learning/Dockerfile index 05721e11..213606b8 100644 --- a/preset/deep-learning/Dockerfile +++ b/preset/deep-learning/Dockerfile @@ -12,158 +12,148 @@ # See the License for the specific language governing permissions and # limitations under the License. + ARG BASE_IMAGE=ubuntu ARG BASE_TAG=22.04 -FROM ${BASE_IMAGE}:${BASE_TAG} AS dgpu-base +FROM ${BASE_IMAGE}:${BASE_TAG} AS deep-learning-base -ENV DEBIAN_FRONTEND=noninteractive +SHELL ["/bin/bash", "-c"] -# See http://bugs.python.org/issue19846 +ENV DEBIAN_FRONTEND=noninteractive ENV LANG C.UTF-8 ARG PYTHON_VERSION EXPOSE 8080 -ENV LANG=C.UTF-8 - -SHELL ["/bin/bash", "-c"] - RUN apt-get update -y && \ apt-get install -y --no-install-recommends --fix-missing \ - apt-utils \ - build-essential \ - bzip2 \ - ca-certificates \ - clinfo \ - cmake \ - diffutils \ - g++ \ - gcc \ - git \ - gnupg2 \ - gpg-agent \ - gzip \ - make \ - numactl \ - patch \ - rsync \ - unzip \ - wget \ - sudo \ - xz-utils && \ + apt-utils \ + build-essential \ + bzip2 \ + ca-certificates \ + clinfo \ + cmake \ + diffutils \ + g++ \ + gcc \ + git \ + gnupg2 \ + gpg-agent \ + gzip \ + make \ + numactl \ + patch \ + rsync \ + unzip \ + wget \ + sudo \ + xz-utils \ + && \ rm -rf /var/lib/apt/lists/* -# GPU Drivers setup ARG DEVICE ARG ICD_VER ARG LEVEL_ZERO_GPU_VER ARG LEVEL_ZERO_VER ARG LEVEL_ZERO_DEV_VER - -# Public Drivers link RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \ + gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \ tee /etc/apt/sources.list.d/intel-gpu-jammy.list RUN apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ - intel-opencl-icd="${ICD_VER}" \ - intel-level-zero-gpu="${LEVEL_ZERO_GPU_VER}" \ - level-zero="${LEVEL_ZERO_VER}" + intel-level-zero-gpu="${LEVEL_ZERO_GPU_VER}" \ + intel-opencl-icd="${ICD_VER}" \ + level-zero="${LEVEL_ZERO_VER}" RUN apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ - intel-media-va-driver-non-free \ - libmfx1 \ - libmfxgen1 \ - libvpl2 \ - libegl-mesa0 \ - libegl1-mesa \ - libegl1-mesa-dev \ - libgbm1 \ - libgl1-mesa-dev \ - libgl1-mesa-dri \ - libglapi-mesa \ - libgles2-mesa-dev \ - libglx-mesa0 \ - libigdgmm12 \ - libxatracker2 \ - mesa-va-drivers \ - mesa-vdpau-drivers \ - mesa-vulkan-drivers \ - va-driver-all \ - vainfo \ - hwinfo \ - clinfo + clinfo \ + hwinfo \ + intel-media-va-driver-non-free \ + libegl-mesa0 \ + libegl1-mesa \ + libegl1-mesa-dev \ + libgbm1 \ + libgl1-mesa-dev \ + libgl1-mesa-dri \ + libglapi-mesa \ + libgles2-mesa-dev \ + libglx-mesa0 \ + libigdgmm12 \ + libmfx1 \ + libmfxgen1 \ + libvpl2 \ + mesa-va-drivers \ + mesa-vdpau-drivers \ + mesa-vulkan-drivers \ + va-driver-all \ + vainfo RUN apt-get install -y --no-install-recommends --fix-missing \ - libigc-dev \ - intel-igc-cm \ - libigdfcl-dev \ - libigfxcmrt-dev \ - level-zero-dev="${LEVEL_ZERO_DEV_VER}" && \ - rm -rf /var/lib/apt/lists/* - -RUN rm /etc/apt/sources.list.d/*list + intel-igc-cm \ + libigc-dev \ + libigdfcl-dev \ + libigfxcmrt-dev \ + level-zero-dev="${LEVEL_ZERO_DEV_VER}" \ + && \ + rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/*list -FROM dgpu-base as deep-learning-python - -# Setting up non-root directories RUN useradd --uid 1000 -d /home/dev -s /bin/bash dev RUN groupadd -g 109 render -## Add the user to the required groups RUN usermod -aG root,sudo,video,render dev -# Set a password for the user (Optional) RUN echo 'dev:password' | chpasswd USER dev WORKDIR /home/dev ENV CONDA_ROOT=/home/dev/conda - -# Miniforge Python Installation ARG MINIFORGE_VERSION ARG PYTHON_VERSION ARG IDP_VERSION ARG INTEL_CHANNEL -RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${MINIFORGE_VERSION}.sh" -O miniforge.sh && \ +RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-forge/miniforge/releases/latest/download/${MINIFORGE_VERSION}.sh" -O miniforge.sh && \ chmod +x miniforge.sh && \ ./miniforge.sh -b -p "${CONDA_ROOT}" && \ rm ./miniforge.sh && \ - ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge3" && \ + ln -s "${CONDA_ROOT}" "${CONDA_ROOT}/../miniforge" && \ export PATH="${CONDA_ROOT}/bin/:${PATH}" && \ - conda update -y conda && \ - conda config --add channels conda-forge && \ - conda config --add channels https://software.repos.intel.com/python/conda/ && \ conda init --all && \ - conda install -c conda-forge \ - 'jupyterlab>=4.1.8' \ - 'notebook>=7.1.3' \ - 'jupyterhub>=4.1.5' \ - 'jupyter-server-proxy>=4.1.2' \ - 'mako>=1.2.2' \ - 'pyjwt>=2.4.0' \ - 'cryptography>=42.0.5' \ - 'nodejs>=20.12.2' \ + conda install -y \ + 'colorama==0.4.6' \ + 'conda==24.5.0' \ + 'jupyter-server-proxy==4.3.0' \ + 'jupyterhub==5.1.0' \ + 'ld_impl_linux-64==2.40' \ + 'mamba==1.5.8' \ + 'networkx==3.3' \ + 'notebook==7.2.1' \ + 'python==3.10.14' \ + 'aiohttp>=3.9.4' \ + 'certifi>=2024.07.04' \ 'idna>=3.7' \ - 'tqdm>=4.66.2' \ - && \ - jupyter labextension disable "@jupyterlab/apputils-extension:announcements" && \ - conda clean -y --all + 'jinja2>=3.1.4' \ + 'requests>=2.32.0' \ + 'setuptools>=70.0.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + 'zipp>=3.19.1' \ + 'nodejs==22.5.1' \ + && \ + jupyter labextension disable "@jupyterlab/apputils-extension:announcements" \ + && \ + conda clean -y --all \ + && \ + conda config --add channels ${INTEL_CHANNEL} ENV PATH ${CONDA_ROOT}/condabin:${CONDA_ROOT}/bin/:${PATH} +RUN conda config --set pip_interop_enabled True -RUN conda config --set pip_interop_enabled True # Improve interoperabilty among conda an pypi packages - - -# PyTorch Installation -ARG IDP_VERSION ARG DPNP_VERSION ARG NUMPY_VERSION - ARG TORCH_CPU_VERSION ARG ONECCL_CPU_VERSION ARG IPEX_CPU_VERSION @@ -171,120 +161,94 @@ ARG TORCHVISION_CPU_VERSION ARG TORCHAUDIO_CPU_VERSION ARG DEEPSPEED_VERSION -# PyTorch CPU Env - conda packages -RUN conda create -yn pytorch-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ - dpnp="${DPNP_VERSION}" \ - numpy="${NUMPY_VERSION}" \ - python="${PYTHON_VERSION}" \ - intel-openmp="${IDP_VERSION}" \ - pytorch="${TORCH_CPU_VERSION}" \ - oneccl_bind_pt="${ONECCL_CPU_VERSION}" \ - intel-extension-for-pytorch="${IPEX_CPU_VERSION}" \ - torchvision="${TORCHVISION_CPU_VERSION}" \ - torchaudio="${TORCHAUDIO_CPU_VERSION}" \ - 'matplotlib-base>=3.4.3' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'pillow>=10.2.0' \ - 'aiohttp>=3.9.0' \ - 'tornado>=6.3.3' \ - 'jinja2>=3.1.3' \ +RUN conda create -yn 'pytorch-cpu' \ + -c huggingface \ + "python=${PYTHON_VERSION}" \ + 'accelerate==0.32.1' \ + "dpnp=${DPNP_VERSION}" \ + "intel-extension-for-pytorch=${IPEX_CPU_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base>=3.8.4' \ + "oneccl_bind_pt=${ONECCL_CPU_VERSION}" \ + "pytorch=${TORCH_CPU_VERSION}" \ + 'tensorboardx==2.6.2.2' \ + "torchaudio=${TORCHAUDIO_CPU_VERSION}" \ + "torchvision=${TORCHVISION_CPU_VERSION}" \ + 'python-dotenv==1.0.1' \ + 'aiohttp>=3.9.4' \ + 'certifi>=2024.07.04' \ 'idna>=3.7' \ - 'onnx>=1.15.0' \ + 'jinja2>=3.1.4' \ + 'onnx>=1.16.0' \ + 'requests>=2.32.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + 'zipp>=3.19.1' \ && \ conda clean -y --all -# PyPI packages -RUN conda run -n pytorch-cpu pip install --no-deps --no-cache-dir --ignore-installed \ - 'ninja>=1.11.1.1' \ - 'python-dotenv>=1.0.1' \ - 'tqdm>=4.66.2' \ - 'cloud-data-connector==1.0.3' \ - 'dataset-librarian==1.0.4' && \ - conda run -n pytorch-cpu pip install --no-cache-dir --ignore-installed \ - 'transformers>=4.40.2' \ - 'datasets>=2.19.1' \ - 'evaluate>=0.4.2' && \ - conda run -n pytorch-cpu pip install --no-cache-dir -U 'accelerate>=0.30.0' && \ - conda run -n pytorch-cpu pip install --no-cache-dir "git+https://github.com/huggingface/optimum-intel.git" && \ +RUN conda run -n 'pytorch-cpu' pip install --no-deps --no-cache-dir \ + 'dataset-librarian==1.0.4' \ + && \ + conda run -n 'pytorch-cpu' pip install --no-cache-dir \ + 'evaluate==0.4.2' \ + "git+https://github.com/huggingface/optimum-intel.git" \ + && \ conda clean -y --all - - -RUN conda run -n pytorch-cpu conda install 'protobuf=4.24' -c conda-forge --override --force-reinstall -y - -# PyTorch Installation ARG IDP_VERSION ARG DPNP_VERSION ARG NUMPY_VERSION - -ARG TORCH_GPU_VERSION -ARG ONECCL_GPU_VERSION -ARG IPEX_GPU_VERSION -ARG TORCHVISION_GPU_VERSION -ARG TORCHAUDIO_GPU_VERSION +ARG TORCH_XPU_VERSION +ARG ONECCL_XPU_VERSION +ARG IPEX_XPU_VERSION +ARG TORCHVISION_XPU_VERSION +ARG TORCHAUDIO_XPU_VERSION ARG IDEX_VERSION -ARG DEEPSPEED_VERSION -# PyTorch GPU Env - conda packages -RUN conda create -yn pytorch-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ - dpnp="${DPNP_VERSION}" \ - dpcpp-cpp-rt="${IDP_VERSION}" \ - mkl-dpcpp="${IDP_VERSION}" \ - dpcpp_impl_linux-64="${IDP_VERSION}" \ - numpy="${NUMPY_VERSION}" \ - python="${PYTHON_VERSION}" \ - intel-openmp="${IDP_VERSION}" \ - python="${PYTHON_VERSION}" \ - pytorch="${TORCH_GPU_VERSION}" \ - oneccl_bind_pt="${ONECCL_GPU_VERSION}" \ - intel-extension-for-pytorch="${IPEX_GPU_VERSION}" \ - torchvision="${TORCHVISION_GPU_VERSION}" \ - torchaudio="${TORCHAUDIO_GPU_VERSION}" \ - 'tensorboardx>=2.6.2.2' \ - 'matplotlib-base>=3.4.3' \ - 'pandas>=2.2.2' \ - 'ipython>=8.18.1' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'pillow>=10.2.0' \ - 'aiohttp>=3.9.0' \ - 'tornado>=6.3.3' \ - 'jinja2>=3.1.3' \ +RUN conda create -yn 'pytorch-gpu' \ + -c huggingface \ + "python=${PYTHON_VERSION}" \ + 'accelerate==0.32.1' \ + "dpnp=${DPNP_VERSION}" \ + "intel-extension-for-pytorch=${IPEX_XPU_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base>=3.8.4' \ + "oneccl_bind_pt=${ONECCL_XPU_VERSION}" \ + "pytorch=${TORCH_XPU_VERSION}" \ + 'tensorboardx==2.6.2.2' \ + "torchaudio=${TORCHAUDIO_XPU_VERSION}" \ + "torchvision=${TORCHVISION_XPU_VERSION}" \ + 'python-dotenv==1.0.1' \ + 'aiohttp>=3.9.4' \ + 'certifi>=2024.07.04' \ 'idna>=3.7' \ - 'onnx>=1.15.0' \ - 'packaging=23.2' \ - 'setuptools=69.1.0' \ + 'jinja2>=3.1.4' \ + 'onnx>=1.16.0' \ + 'requests>=2.32.0' \ + 'tqdm>=4.66.3' \ + 'urllib3>=2.2.2' \ + 'zipp>=3.19.1' \ && \ conda clean -y --all -# PyPI packages -RUN conda run -n pytorch-gpu pip install --no-deps --no-cache-dir --ignore-installed \ - 'ninja>=1.11.1.1' \ - 'python-dotenv>=1.0.1' \ - 'tqdm>=4.66.2' \ - 'cloud-data-connector==1.0.3' \ - 'dataset-librarian==1.0.4' && \ - conda run -n pytorch-gpu pip install --no-cache-dir --ignore-installed \ - 'transformers>=4.40.2' \ - 'datasets>=2.19.1' \ - 'evaluate>=0.4.2' && \ - conda run -n pytorch-gpu pip install --no-cache-dir -U 'accelerate>=0.30.0' && \ - conda run -n pytorch-gpu pip install --no-cache-dir "git+https://github.com/huggingface/optimum-intel.git" && \ +RUN conda run -n 'pytorch-gpu' pip install --no-deps --no-cache-dir \ + 'dataset-librarian==1.0.4' \ + && \ + conda run -n 'pytorch-gpu' pip install --no-cache-dir \ + 'evaluate==0.4.2' \ + "git+https://github.com/huggingface/optimum-intel.git" \ + && \ conda clean -y --all - - -RUN conda run -n pytorch-gpu conda install 'protobuf=4.24' -c conda-forge --override --force-reinstall -y - - -# TensorFlow Installation ARG IDP_VERSION ARG DPNP_VERSION ARG NUMPY_VERSION - ARG TF_VERSION -ARG ITEX_VERSION +ARG ITEX_CPU_VERSION +ARG ITEX_XPU_VERSION ARG HOROVOD_VERSION ARG IMPI_VERSION @@ -293,149 +257,122 @@ ARG HOROVOD_WITHOUT_MXNET=1 ARG HOROVOD_WITHOUT_GLOO=1 ARG HOROVOD_WITH_MPI=1 - -# Tensorflow Env - conda packages -RUN conda create -yn tensorflow-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ - dpnp="${DPNP_VERSION}" \ - dpcpp-cpp-rt="${IDP_VERSION}" \ - mkl-dpcpp="${IDP_VERSION}" \ - numpy="${NUMPY_VERSION}" \ - python="${PYTHON_VERSION}" \ - intel-extension-for-tensorflow="${ITEX_VERSION}=*cpu*" \ - intel-optimization-for-horovod="${INTEL_HOROVOD}" \ - tensorflow="${TF_VERSION}" \ - impi-devel="${IMPI_VERSION}" \ - 'matplotlib-base>=3.4.3' \ - 'ipython>=8.18.1' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'pillow>=10.2.0' \ - 'cryptography>=42.0.4' \ - 'werkzeug>=2.2.3' \ - 'aiohttp>=3.9.0' \ - 'tornado>=6.3.3' \ - 'pyjwt>=2.8.0' \ - 'oauthlib>=3.2.2' \ - 'idna>=3.7' \ +RUN conda create -yn 'tensorflow-cpu' \ + "python=${PYTHON_VERSION}" \ + "dpnp=${DPNP_VERSION}" \ + "intel-extension-for-tensorflow=${ITEX_CPU_VERSION}=*cpu*" \ + "intel-optimization-for-horovod=${HOROVOD_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base>=3.8.4' \ 'onnx>=1.14.1' \ + 'py-cpuinfo==9.0.0' \ + "tensorflow=${TF_VERSION}" \ + 'tensorflow-hub==0.16.1' \ + 'tqdm==4.66.4' \ + 'python-dotenv==1.0.1' \ + 'aiohttp>=3.9.4' \ + 'certifi>=2024.07.04' \ + 'idna>=3.7' \ + 'requests>=2.32.0' \ + 'urllib3>=2.2.2' \ + 'werkzeug>=3.0.3' \ + 'zipp>=3.19.1' \ && \ conda clean -y --all -# PyPI packages -RUN conda run -n tensorflow-cpu pip install --no-cache-dir --ignore-installed \ - 'py-cpuinfo>=9.0.0' \ - 'requests>=2.31.0' \ - 'cryptography>=42.0.7' -RUN conda run -n tensorflow-cpu pip install --no-deps --no-cache-dir --ignore-installed \ - 'tensorflow-hub>=0.16.1' \ - 'tqdm>=4.66.2' \ +RUN conda run -n 'tensorflow-cpu' pip install --no-deps --no-cache-dir \ 'dataset-librarian==1.0.4' \ - 'cloud-data-connector>=1.0.3' && \ + && \ conda clean -y --all -# Tensorflow Env - conda packages -RUN conda create -yn tensorflow-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ - dpnp="${DPNP_VERSION}" \ - dpcpp-cpp-rt="${IDP_VERSION}" \ - mkl-dpcpp="${IDP_VERSION}" \ - numpy="${NUMPY_VERSION}" \ - python="${PYTHON_VERSION}" \ - intel-extension-for-tensorflow="${ITEX_VERSION}=*xpu*" \ - intel-optimization-for-horovod="${INTEL_HOROVOD}" \ - tensorflow="${TF_VERSION}" \ - impi-devel="${IMPI_VERSION}" \ - 'matplotlib-base>=3.4.3' \ - 'ipython>=8.18.1' \ - 'ipykernel>=6.29.3' \ - 'kernda>=0.3.0' \ - 'pillow>=10.2.0' \ - 'cryptography>=42.0.4' \ - 'werkzeug>=2.2.3' \ - 'aiohttp>=3.9.0' \ - 'tornado>=6.3.3' \ - 'pyjwt>=2.8.0' \ - 'oauthlib>=3.2.2' \ - 'idna>=3.7' \ +RUN conda create -yn 'tensorflow-gpu' \ + "python=${PYTHON_VERSION}" \ + "dpnp=${DPNP_VERSION}" \ + "intel-extension-for-tensorflow=${ITEX_XPU_VERSION}=*xpu*" \ + "intel-optimization-for-horovod=${HOROVOD_VERSION}" \ + 'ipykernel==6.29.5' \ + 'kernda==0.3.0' \ + 'matplotlib-base>=3.8.4' \ 'onnx>=1.14.1' \ - 'packaging=23.2' \ - 'setuptools=69.1.0' \ + 'py-cpuinfo==9.0.0' \ + "tensorflow=${TF_VERSION}" \ + 'tensorflow-hub==0.16.1' \ + 'tqdm==4.66.4' \ + 'python-dotenv==1.0.1' \ + 'aiohttp>=3.9.4' \ + 'certifi>=2024.07.04' \ + 'idna>=3.7' \ + 'requests>=2.32.0' \ + 'urllib3>=2.2.2' \ + 'zipp>=3.19.1' \ && \ conda clean -y --all -# PyPI packages -RUN conda run -n tensorflow-gpu pip install --no-cache-dir --ignore-installed \ - 'py-cpuinfo>=9.0.0' \ - 'requests>=2.31.0' \ - 'cryptography>=42.0.7' -RUN conda run -n tensorflow-gpu pip install --no-deps --no-cache-dir --ignore-installed \ - 'tensorflow-hub>=0.16.1' \ - 'tqdm>=4.66.2' \ +RUN conda run -n 'tensorflow-gpu' pip install --no-deps --no-cache-dir \ 'dataset-librarian==1.0.4' \ - 'cloud-data-connector==1.0.3' && \ + && \ conda clean -y --all -FROM deep-learning-python as deep-learning-jupyter - -ARG KERNEL_NAME_TF_CPU="Intel TensorFlow cpu" -ARG KERNEL_NAME_TF_GPU="Intel TensorFlow gpu" -ARG KERNEL_NAME_PT_CPU="Intel PyTorch cpu" -ARG KERNEL_NAME_PT_GPU="Intel PyTorch gpu" - EXPOSE 8888 RUN mkdir -p ~/jupyter/ && chmod -R a+rwx ~/jupyter/ && \ mkdir ~/.local && chmod a+rwx ~/.local RUN \ - "${CONDA_ROOT}/envs/pytorch-cpu/bin/python" -m ipykernel install --user --name pytorch-cpu --display-name "${KERNEL_NAME_PT_CPU}" && \ - "${CONDA_ROOT}/envs/pytorch-cpu/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo pytorch-cpu | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - "${CONDA_ROOT}/envs/pytorch-gpu/bin/python" -m ipykernel install --user --name pytorch-gpu --display-name "${KERNEL_NAME_PT_GPU}" && \ - "${CONDA_ROOT}/envs/pytorch-gpu/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo pytorch-gpu | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - "${CONDA_ROOT}/envs/tensorflow-cpu/bin/python" -m ipykernel install --user --name tensorflow-cpu --display-name "${KERNEL_NAME_TF_CPU}" && \ - "${CONDA_ROOT}/envs/tensorflow-cpu/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo tensorflow-cpu | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - "${CONDA_ROOT}/envs/tensorflow-gpu/bin/python" -m ipykernel install --user --name tensorflow-gpu --display-name "${KERNEL_NAME_TF_GPU}" && \ - "${CONDA_ROOT}/envs/tensorflow-gpu/bin/kernda" -o -y "$HOME/.local/share/jupyter/kernels/$(echo tensorflow-gpu | sed -e 's/\(.*\)/\L\1/')/kernel.json" && \ - python -m ipykernel.kernelspec --user + ENVS_LIST=('pytorch-cpu' 'pytorch-gpu' 'tensorflow-cpu' 'tensorflow-gpu') && \ + KERNEL_NAMES=('Intel PyTorch CPU' 'Intel PyTorch GPU' 'Intel TensorFlow CPU' 'Intel TensorFlow GPU') && \ + for i in "${!ENVS_LIST[@]}"; do \ + CONDA_ENV="${ENVS_LIST[i]}" && \ + KERNEL_NAME="${KERNEL_NAMES[i]}" && \ + KERNEL_DIR="${CONDA_ROOT}/share/jupyter/kernels/$CONDA_ENV" && \ + conda run -n "$CONDA_ENV" python -m ipykernel install --prefix "$CONDA_ROOT" --name "$CONDA_ENV" --display-name "$KERNEL_NAME" && \ + conda run -n "$CONDA_ENV" kernda -o -y "$KERNEL_DIR/kernel.json" && \ + conda run -n base jupyter kernelspec list \ + ; done CMD ["bash", "-c", "jupyter lab --notebook-dir=~/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root"] -FROM deep-learning-jupyter as distributed-deep-learning +FROM deep-learning-base as deep-learning +SHELL ["/bin/bash", "-c"] USER root -# Install OpenMPI -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libopenmpi-dev \ - openmpi-bin \ - openmpi-common +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common ENV OMPI_ALLOW_RUN_AS_ROOT=1 ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ENV OMPI_MCA_tl_tcp_if_exclude="lo,docker0" -# Install OpenSSH RUN apt-get install -y --no-install-recommends --fix-missing \ - openssh-client \ - openssh-server && \ - rm /etc/ssh/ssh_host_*_key \ - /etc/ssh/ssh_host_*_key.pub && \ - rm -rf /var/lib/apt/lists/* - -RUN mkdir -p /var/run/sshd && \ + openssh-client \ + openssh-server \ + && \ + rm -rf \ + /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub \ + /var/lib/apt/lists/* \ + && \ + mkdir -p /var/run/sshd \ + && \ echo 'LoginGraceTime 0' >> /etc/ssh/sshd_config -# https://github.com/openucx/ucx/issues/4742#issuecomment-584059909 ENV UCX_TLS=ud,sm,self USER dev -RUN conda install -n pytorch-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ - deepspeed="${DEEPSPEED_VERSION}" \ - 'tensorboardx>=2.6.2.2' - -RUN conda install -n pytorch-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ - deepspeed="${DEEPSPEED_VERSION}" \ - 'tensorboardx>=2.6.2.2' +RUN ENVS_LIST=('pytorch-cpu' 'pytorch-gpu') && \ + for i in "${!ENVS_LIST[@]}"; do \ + CONDA_ENV="${ENVS_LIST[i]}" && \ + conda install -yn "$CONDA_ENV" \ + "deepspeed=${DEEPSPEED_VERSION}" \ + 'tensorboardx==2.6.2.2' \ + ; done && \ + conda clean -y --all COPY --chown=dev notebooks /home/dev/jupyter COPY --chown=dev tests /home/dev/sample-tests diff --git a/preset/deep-learning/docker-compose.yaml b/preset/deep-learning/docker-compose.yaml index 663e064c..023b6f82 100644 --- a/preset/deep-learning/docker-compose.yaml +++ b/preset/deep-learning/docker-compose.yaml @@ -15,6 +15,7 @@ # -*- coding: utf-8 -*- # + version: '3' services: dl-base: @@ -22,44 +23,42 @@ services: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} BASE_TAG: ${BASE_TAG:-22.04} - DEEPSPEED_VERSION: ${DEEPSPEED_VERSION:-0.14.0} + DEEPSPEED_VERSION: ${DEEPSPEED_VERSION:-0.14.2} DEVICE: ${DEVICE:-flex} - DPNP_VERSION: ${NUMBA_DPEX_VERSION:-0.14.0} - HOROVOD_VERSION: ${HOROVOD_VERSION:-0.28.1.4} - ICD_VER: 23.43.27642.40-803~22.04 - IDP_VERSION: ${IDP_VERSION:-2024.1.0} - IMPI_VERSION: ${IMPI_VERSION:-2021.12} + DPNP_VERSION: ${DPNP_VERSION:-0.15.0} + HOROVOD_VERSION: ${HOROVOD_VERSION:-0.28.1.5} + ICD_VER: 23.43.27642.52-803~22.04 + IDP_VERSION: ${IDP_VERSION:-2024.2} + IMPI_VERSION: ${IMPI_VERSION:-2021.13} INTEL_CHANNEL: ${INTEL_CHANNEL:-https://software.repos.intel.com/python/conda/} - IPEX_CPU_VERSION: ${IPEX_CPU_VERSION:-2.2.0=*cpu*} - IPEX_GPU_VERSION: ${IPEX_GPU_VERSION:-2.1.20=*xpu*} - ITEX_VERSION: ${ITEX_VERSION:-2.15} + IPEX_CPU_VERSION: ${IPEX_CPU_VERSION:-2.3.100} + IPEX_XPU_VERSION: ${IPEX_XPU_VERSION:-2.1.40} + ITEX_CPU_VERSION: ${ITEX_CPU_VERSION:-2.15.0} + ITEX_XPU_VERSION: ${ITEX_XPU_VERSION:-2.15.0.1} LEVEL_ZERO_DEV_VER: 1.14.0-744~22.04 - LEVEL_ZERO_GPU_VER: 1.3.27642.40-803~22.04 + LEVEL_ZERO_GPU_VER: 1.3.27642.52-803~22.04 LEVEL_ZERO_VER: 1.14.0-744~22.04 - MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} - MPI_VERSION: ${MPI_VERSION:-2021.12.0} - NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.22.1} + MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Miniforge3-Linux-x86_64} + MPI_VERSION: ${MPI_VERSION:-2021.13} + NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.23.0} NUMPY_VERSION: ${NUMPY_VERSION:-1.26.4} - ONECCL_CPU_VERSION: ${ONECCL_CPU_VERSION:-2.2.0=*cpu*} - ONECCL_GPU_VERSION: ${ONECCL_GPU_VERSION:-2.1.200=*xpu*} - PYTHON_VERSION: ${PYTHON_VERSION:-3.10} - TF_VERSION: ${TF_VERSION:-2.15} - TORCHAUDIO_CPU_VERSION: ${TORCHAUDIO_CPU_VERSION:-2.2.0=*cpu*} - TORCHAUDIO_GPU_VERSION: ${TORCHAUDIO_GPU_VERSION:-2.1.0=*xpu*} - TORCHVISION_CPU_VERSION: ${TORCHVISION_CPU_VERSION:-0.17=*cpu*} - TORCHVISION_GPU_VERSION: ${TORCHVISION_GPU_VERSION:-0.16.0=*xpu*} - TORCH_CPU_VERSION: ${TORCH_CPU_VERSION:-2.2.0=*cpu*} - TORCH_GPU_VERSION: ${TORCH_GPU_VERSION:-2.1.0=*xpu*} + ONECCL_CPU_VERSION: ${ONECCL_CPU_VERSION:-2.3.0} + ONECCL_XPU_VERSION: ${ONECCL_XPU_VERSION:-2.1.400} + PYTHON_VERSION: ${PYTHON_VERSION:-3.9} + TF_VERSION: ${TF_VERSION:-2.15.1} + TORCHAUDIO_CPU_VERSION: ${TORCHAUDIO_CPU_VERSION:-2.3.1} + TORCHAUDIO_XPU_VERSION: ${TORCHAUDIO_XPU_VERSION:-2.1.0} + TORCHVISION_CPU_VERSION: ${TORCHVISION_CPU_VERSION:-0.18.1} + TORCHVISION_XPU_VERSION: ${TORCHVISION_XPU_VERSION:-0.16.0} + TORCH_CPU_VERSION: ${TORCH_CPU_VERSION:-2.3.1} + TORCH_XPU_VERSION: ${TORCH_XPU_VERSION:-2.1.0} http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: '' context: . labels: docs: false - target: deep-learning-jupyter - command: | - bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ - conda run -n tensorflow-cpu python -c 'import tensorflow as tf; print(tf.__version__)'" + target: deep-learning-base environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} @@ -67,15 +66,24 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path + command: > + bash -c " conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import + intel_extension_for_pytorch as ipex;print(ipex.__version__);' && + + conda run -n tensorflow-cpu python -c 'import tensorflow as tf;print(tf.__version__)' + " + + deep-learning: build: + target: deep-learning labels: docs: deep_learning org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/deep-learning" - org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} - dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.2.0-py${PYTHON_VERSION:-3.9} + dependency.python: ${PYTHON_VERSION:-3.9} dependency.python.pip: requirements.txt dependency.apt.apt-utils: true dependency.apt.build-essential: true @@ -92,11 +100,11 @@ services: dependency.apt.gzip: true dependency.apt.hwinfo: true dependency.apt.intel-igc-cm: true - dependency.apt.intel-level-zero-gpu: '=1.3.27642.40-803~22.04' + dependency.apt.intel-level-zero-gpu: true dependency.apt.intel-media-va-driver-non-free: true - dependency.apt.intel-opencl-icd: '=23.43.27642.40-803~22.04' - dependency.apt.level-zero: '=1.14.0-744~22.04' - dependency.apt.level-zero-dev: '=1.14.0-744~22.04' + dependency.apt.intel-opencl-icd: true + dependency.apt.level-zero: true + dependency.apt.level-zero-dev: true dependency.apt.libegl1-mesa: true dependency.apt.libegl1-mesa-dev: true dependency.apt.libegl-mesa0: true @@ -114,7 +122,6 @@ services: dependency.apt.libmfxgen1: true dependency.apt.libopenmpi-dev: true dependency.apt.libvpl2: true - dependency.apt.libxatracker2: true dependency.apt.make: true dependency.apt.mesa-va-drivers: true dependency.apt.mesa-vdpau-drivers: true @@ -132,69 +139,71 @@ services: dependency.apt.vainfo: true dependency.apt.wget: true dependency.apt.xz-utils: true - dependency.conda.jupyterlab: '>=4.1.8' - dependency.conda.aiohttp: '>=3.9.0' - dependency.conda.cryptography: '>=42.0.4' - dependency.conda.dataset_librarian: '>=1.0.4' - dependency.conda.deepspeed: '=0.14.0' - dependency.conda.dpcpp_impl_linux-64: '=2024.1.0' - dependency.conda.dpcpp-cpp-rt: '=2024.1.0' - dependency.conda.dpnp: '=0.14.0' - dependency.conda.idna: '>=3.7' - dependency.conda.impi-devel: '=2021.12' - dependency.conda.intel-extension-for-pytorch_cpu: '=2.2.0=*cpu*' - dependency.conda.intel-extension-for-pytorch_gpu: '=2.1.20=*xpu*' - dependency.conda.intel-extension-for-tensorflow_cpu: '=2.15=*cpu*' - dependency.conda.intel-extension-for-tensorflow_gpu: '=2.15=*xpu*' - dependency.conda.intel-openmp: '=2024.1.0' - dependency.conda.intel-optimization-for-horovod: '=0.28.1.4' - dependency.conda.ipykernel: '>=6.29.3' - dependency.conda.ipython: '>=8.18.1' - dependency.conda.jinja2: '>=3.1.3' - dependency.conda.jupyterhub: '>=4.1.5' - dependency.conda.jupyter-server-proxy: '>=4.1.2' - dependency.conda.kernda: '>=0.3.0' - dependency.conda.mako: '>=1.2.2' - dependency.conda.matplotlib-base: '>=3.4.3' - dependency.conda.mkl-dpcpp: '2024.1.0' - dependency.conda.nodejs: '>=20.12.2' - dependency.conda.notebook: '>=7.1.3' - dependency.conda.numpy: '=1.26.4' - dependency.conda.oauthlib: '>=3.2.2' - dependency.conda.oneccl_bind_pt_cpu: '=2.2.0=*cpu*' - dependency.conda.oneccl_bind_pt_gpu: '=2.1.200=*xpu*' + dependency.conda.accelerate: '==0.32.1' + dependency.conda.colorama: '==0.4.6' + dependency.conda.conda: '==24.5.0' + dependency.conda.dpnp: '=0.15.0' + dependency.conda.intel-extension-for-pytorch_cpu: '=2.3.100' + dependency.conda.intel-extension-for-pytorch_xpu: '=2.1.40' + dependency.conda.intel-extension-for-tensorflow_cpu: '=2.15.0=*cpu*' + dependency.conda.intel-extension-for-tensorflow_xpu: '=2.15.0.1=*xpu*' + dependency.conda.intel-optimization-for-horovod: '=0.28.1.5' + dependency.conda.ipykernel: '==6.29.5' + dependency.conda.jupyterhub: '==5.1.0' + dependency.conda.jupyter-server-proxy: '==4.3.0' + dependency.conda.kernda: '==0.3.0' + dependency.conda.ld_impl_linux-64: '==2.40' + dependency.conda.mamba: '==1.5.8' + dependency.conda.matplotlib-base: '>=3.8.4' + dependency.conda.mpi: '==1.0' + dependency.conda.mpich: '==4.2.2' + dependency.conda.networkx: '==3.3' + dependency.conda.notebook: '==7.2.1' + dependency.conda.oneccl_bind_pt_cpu: '=2.3.0' + dependency.conda.oneccl_bind_pt_xpu: '=2.1.400' dependency.conda.onnx: '>=1.14.1' - dependency.conda.packaging: '=23.2' - dependency.conda.pandas: '>=2.2.2' - dependency.conda.pillow: '>=10.2.0' - dependency.conda.protobuf: '=4.24' - dependency.conda.pyjwt: '>=2.4.0' - dependency.conda.python: "=${PYTHON_VERSION:-3.10}" - dependency.conda.pytorch_cpu: '=2.2.0=*cpu*' - dependency.conda.pytorch_gpu: '=2.1.0=*xpu*' - dependency.conda.setuptools: '=69.1.0' - dependency.conda.tensorboardx: '>=2.6.2.2' - dependency.conda.tensorflow: '=2.15' - dependency.conda.torchaudio_cpu: '=2.2.0=*cpu*' - dependency.conda.torchaudio_gpu: '=2.1.0=*xpu*' - dependency.conda.torchvision_cpu: '=0.17=*cpu*' - dependency.conda.torchvision_gpu: '=0.16.0=*xpu*' - dependency.conda.tornado: '>=6.3.3' - dependency.conda.tqdm: '>=4.66.2' - dependency.conda.werkzeug: '>=2.2.3' - target: distributed-deep-learning + dependency.conda.py-cpuinfo: '==9.0.0' + dependency.conda.python: '==3.10.14' + dependency.conda.pytorch_cpu: '=2.3.1' + dependency.conda.pytorch_xpu: '=2.1.0' + dependency.conda.tensorboardx: '==2.6.2.2' + dependency.conda.tensorflow: '=2.15.1' + dependency.conda.tensorflow-hub: '==0.16.1' + dependency.conda.torchaudio_cpu: '=2.3.1' + dependency.conda.torchaudio_xpu: '=2.1.0' + dependency.conda.torchvision_cpu: '=0.18.1' + dependency.conda.torchvision_xpu: '=0.16.0' + dependency.conda.tqdm: '==4.66.4' depends_on: - dl-base extends: dl-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-2024.1.0-py${PYTHON_VERSION:-3.10} - command: | - bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ - conda run -n pytorch-cpu bash -c 'mpirun --version' && \ - conda run -n pytorch-cpu python -c 'import oneccl_bindings_for_pytorch as oneccl;print(\"\\nOneCCL:\", oneccl.__version__)' && \ - conda run -n pytorch-gpu python -c 'import torch;print(torch.device(\"xpu\"));import intel_extension_for_pytorch as ipex;print(ipex.xpu.is_available());print(ipex.xpu.has_onemkl())' && \ - conda run -n pytorch-gpu bash -c 'mpirun --version' && \ - conda run -n pytorch-gpu python -c 'import oneccl_bindings_for_pytorch as oneccl;print(\"\\nOneCCL:\", oneccl.__version__)' && \ - conda run -n tensorflow-cpu python -c 'import tensorflow;print(tensorflow.__version__);import intel_extension_for_tensorflow as itex;print(itex.__version__)' && \ - conda run -n tensorflow-gpu python -c 'from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())' && \ - conda run -n tensorflow-gpu bash -c 'horovodrun --check-build && mpirun --version' && \ - conda run -n tensorflow-gpu python -c 'import horovod.tensorflow as hvd;hvd.init();import horovod.tensorflow'" + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + command: > + bash -c " conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import + intel_extension_for_pytorch as ipex;print(ipex.__version__);' && + + conda run -n pytorch-cpu bash -c 'mpirun --version' && + + conda run -n pytorch-cpu python -c 'import oneccl_bindings_for_pytorch as oneccl;print(\"\\nOneCCL:\", + oneccl.__version__)' && + + conda run -n pytorch-gpu python -c 'import torch;print(torch.device(\"xpu\"));import + intel_extension_for_pytorch as ipex;print(ipex.xpu.is_available());print(ipex.xpu.has_onemkl())' + && + + conda run -n pytorch-gpu bash -c 'mpirun --version' && + + conda run -n pytorch-gpu python -c 'import oneccl_bindings_for_pytorch as oneccl;print(\"\\nOneCCL:\", + oneccl.__version__)' && + + conda run -n tensorflow-cpu python -c 'import tensorflow;print(tensorflow.__version__);import + intel_extension_for_tensorflow as itex;print(itex.__version__)' && + + conda run -n tensorflow-gpu python -c 'from tensorflow.python.client import + device_lib;print(device_lib.list_local_devices())' && + + conda run -n tensorflow-gpu bash -c 'horovodrun --check-build && mpirun --version' + && + + conda run -n tensorflow-gpu python -c 'import horovod.tensorflow as hvd;hvd.init();import + horovod.tensorflow' " diff --git a/preset/deep-learning/requirements.txt b/preset/deep-learning/requirements.txt index 4122126b..db93ef0d 100644 --- a/preset/deep-learning/requirements.txt +++ b/preset/deep-learning/requirements.txt @@ -1,14 +1,3 @@ -accelerate>=0.30.0 -cloud-data-connector>=1.0.3 -cryptography>=42.0.7 -dataset-librarian>=1.0.4 -datasets>=2.19.1 -evaluate>=0.4.2 +dataset-librarian==1.0.4 +evaluate==0.4.2 git+https://github.com/huggingface/optimum-intel.git -ninja>=1.11.1.1 -py-cpuinfo>=9.0.0 -python-dotenv>=1.0.1 -requests>=2.31.0 -tensorflow-hub>=0.16.1 -tqdm>=4.66.2 -transformers>=4.40.2 diff --git a/preset/deep-learning/tests.yaml b/preset/deep-learning/tests.yaml old mode 100644 new mode 100755 index 0b0cdcae..399d4291 --- a/preset/deep-learning/tests.yaml +++ b/preset/deep-learning/tests.yaml @@ -12,39 +12,50 @@ # See the License for the specific language governing permissions and # limitations under the License. +--- deep-learning-ipex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device cpu --ipex - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} deep-learning-ipex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device xpu --ipex - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] + deep-learning-ipex-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/ipex/ResNet50_Inference.ipynb -k pytorch-cpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True deep-learning-ipex-notebook-${PYTHON_VERSION:-3.9}-gpu: cmd: papermill --log-output jupyter/ipex/ResNet50_Inference.ipynb -k pytorch-gpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True + device: ["/dev/dri"] + deep-learning-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/ipex-quantization/IntelPytorch_Quantization.ipynb -k pytorch-cpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True + deep-learning-itex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} deep-learning-itex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] + deep-learning-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} deep-learning-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] + deep-learning-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} deep-learning-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] diff --git a/preset/inference-optimization/Dockerfile b/preset/inference-optimization/Dockerfile index 6689b437..a38e8266 100644 --- a/preset/inference-optimization/Dockerfile +++ b/preset/inference-optimization/Dockerfile @@ -12,35 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. + ARG COMPOSE_PROJECT_NAME FROM ${COMPOSE_PROJECT_NAME}-dl-base as inference-optimization -ENV SIGOPT_PROJECT=. +SHELL ["/bin/bash", "-c"] +ENV SIGOPT_PROJECT=. ARG NEURAL_COMPRESSOR_VERSION ARG INTEL_CHANNEL - -RUN conda install -yn pytorch-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ - neural-compressor="${NEURAL_COMPRESSOR_VERSION}" - -RUN conda install -yn pytorch-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ - neural-compressor="${NEURAL_COMPRESSOR_VERSION}" - -RUN conda install -yn tensorflow-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ - neural-compressor="${NEURAL_COMPRESSOR_VERSION}" - -RUN conda install -yn tensorflow-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ - neural-compressor="${NEURAL_COMPRESSOR_VERSION}" - -RUN conda run -n tensorflow-cpu python -m pip install --no-deps --no-cache-dir \ - 'tf2onnx>=1.16.1' \ - 'onnxruntime>=1.17.3' && \ +RUN ENVS_LIST=('pytorch-cpu' 'pytorch-gpu' 'tensorflow-cpu' 'tensorflow-gpu') && \ + for i in "${!ENVS_LIST[@]}"; do \ + CONDA_ENV="${ENVS_LIST[i]}" && \ + conda install -yn "$CONDA_ENV" \ + "neural-compressor=${NEURAL_COMPRESSOR_VERSION}" \ + 'scikit-learn>=1.5.0' \ + ; \ + done && \ conda clean -y --all -RUN conda run -n tensorflow-gpu python -m pip install --no-deps --no-cache-dir \ - 'tf2onnx>=1.16.1' \ - 'onnxruntime>=1.17.3' && \ +RUN ENVS_LIST=('tensorflow-cpu' 'tensorflow-gpu') && \ + for i in "${!ENVS_LIST[@]}"; do \ + CONDA_ENV="${ENVS_LIST[i]}" && \ + conda run -n "$CONDA_ENV" python -m pip install --no-deps --no-cache-dir \ + 'tf2onnx==1.16.1' \ + 'onnxruntime==1.18.1' \ + ; \ + done && \ conda clean -y --all COPY --chown=dev notebooks /home/dev/jupyter diff --git a/preset/inference-optimization/docker-compose.yaml b/preset/inference-optimization/docker-compose.yaml index ac8ebc07..cf543bff 100644 --- a/preset/inference-optimization/docker-compose.yaml +++ b/preset/inference-optimization/docker-compose.yaml @@ -15,6 +15,7 @@ # -*- coding: utf-8 -*- # + version: '3' services: dl-base: @@ -22,42 +23,42 @@ services: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} BASE_TAG: ${BASE_TAG:-22.04} - DEEPSPEED_VERSION: ${DEEPSPEED_VERSION:-0.14.0} DEVICE: ${DEVICE:-flex} - DPNP_VERSION: ${NUMBA_DPEX_VERSION:-0.14.0} - HOROVOD_VERSION: ${HOROVOD_VERSION:-0.28.1.4} - ICD_VER: 23.43.27642.40-803~22.04 - IDP_VERSION: ${IDP_VERSION:-2024.1.0} - IMPI_VERSION: ${IMPI_VERSION:-2021.12} + DPNP_VERSION: ${DPNP_VERSION:-0.15.0} + HOROVOD_VERSION: ${HOROVOD_VERSION:-0.28.1.5} + ICD_VER: 23.43.27642.52-803~22.04 + IDP_VERSION: ${IDP_VERSION:-2024.2} + IMPI_VERSION: ${IMPI_VERSION:-2021.13} INTEL_CHANNEL: ${INTEL_CHANNEL:-https://software.repos.intel.com/python/conda/} - IPEX_CPU_VERSION: ${IPEX_CPU_VERSION:-2.2.0=*cpu*} - IPEX_GPU_VERSION: ${IPEX_GPU_VERSION:-2.1.20=*xpu*} - ITEX_VERSION: ${ITEX_VERSION:-2.15} + IPEX_CPU_VERSION: ${IPEX_CPU_VERSION:-2.3.100} + IPEX_XPU_VERSION: ${IPEX_XPU_VERSION:-2.1.40} + ITEX_CPU_VERSION: ${ITEX_CPU_VERSION:-2.15.0} + ITEX_XPU_VERSION: ${ITEX_XPU_VERSION:-2.15.0.1} LEVEL_ZERO_DEV_VER: 1.14.0-744~22.04 - LEVEL_ZERO_GPU_VER: 1.3.27642.40-803~22.04 + LEVEL_ZERO_GPU_VER: 1.3.27642.52-803~22.04 LEVEL_ZERO_VER: 1.14.0-744~22.04 - MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} - MPI_VERSION: ${MPI_VERSION:-2021.12.0} - NEURAL_COMPRESSOR_VERSION: ${NEURAL_COMPRESSOR_VERSION:-2.4.1} - NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.22.1} + MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Miniforge3-Linux-x86_64} + MPI_VERSION: ${MPI_VERSION:-2021.13} + NEURAL_COMPRESSOR_VERSION: ${NEURAL_COMPRESSOR_VERSION:-2.5.1} + NUMBA_DPEX_VERSION: ${NUMBA_DPEX_VERSION:-0.23.0} NUMPY_VERSION: ${NUMPY_VERSION:-1.26.4} - ONECCL_CPU_VERSION: ${ONECCL_CPU_VERSION:-2.2.0=*cpu*} - ONECCL_GPU_VERSION: ${ONECCL_GPU_VERSION:-2.1.200=*xpu*} - PYTHON_VERSION: ${PYTHON_VERSION:-3.10} - TF_VERSION: ${TF_VERSION:-2.15} - TORCHAUDIO_CPU_VERSION: ${TORCHAUDIO_CPU_VERSION:-2.2.0=*cpu*} - TORCHAUDIO_GPU_VERSION: ${TORCHAUDIO_GPU_VERSION:-2.1.0=*xpu*} - TORCHVISION_CPU_VERSION: ${TORCHVISION_CPU_VERSION:-0.17=*cpu*} - TORCHVISION_GPU_VERSION: ${TORCHVISION_GPU_VERSION:-0.16.0=*xpu*} - TORCH_CPU_VERSION: ${TORCH_CPU_VERSION:-2.2.0=*cpu*} - TORCH_GPU_VERSION: ${TORCH_GPU_VERSION:-2.1.0=*xpu*} + ONECCL_CPU_VERSION: ${ONECCL_CPU_VERSION:-2.3.0} + ONECCL_XPU_VERSION: ${ONECCL_XPU_VERSION:-2.1.400} + PYTHON_VERSION: ${PYTHON_VERSION:-3.9} + TF_VERSION: ${TF_VERSION:-2.15.1} + TORCHAUDIO_CPU_VERSION: ${TORCHAUDIO_CPU_VERSION:-2.3.1} + TORCHAUDIO_XPU_VERSION: ${TORCHAUDIO_XPU_VERSION:-2.1.0} + TORCHVISION_CPU_VERSION: ${TORCHVISION_CPU_VERSION:-0.18.1} + TORCHVISION_XPU_VERSION: ${TORCHVISION_XPU_VERSION:-0.16.0} + TORCH_CPU_VERSION: ${TORCH_CPU_VERSION:-2.3.1} + TORCH_XPU_VERSION: ${TORCH_XPU_VERSION:-2.1.0} http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: '' context: ../deep-learning labels: docs: false - target: deep-learning-jupyter + target: deep-learning-base environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} @@ -65,9 +66,12 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path - command: | - bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ - conda run -n tensorflow-cpu python -c 'import tensorflow as tf; print(tf.__version__)'" + command: > + bash -c " conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import + intel_extension_for_pytorch as ipex;print(ipex.__version__)' && + + conda run -n tensorflow-cpu python -c 'import tensorflow as tf;print(tf.__version__)' + " inference-optimization: @@ -75,13 +79,14 @@ services: args: COMPOSE_PROJECT_NAME: ${COMPOSE_PROJECT_NAME:-preset} context: . + target: inference-optimization labels: docs: inference_optimization org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Inference Optimization" org.opencontainers.base.name: "intel/deep-learning" org.opencontainers.image.name: "intel/inference-optimization" - org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} - dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.2.0-py${PYTHON_VERSION:-3.9} + dependency.python: ${PYTHON_VERSION:-3.9} dependency.python.pip: requirements.txt dependency.apt.apt-utils: true dependency.apt.build-essential: true @@ -98,11 +103,11 @@ services: dependency.apt.gzip: true dependency.apt.hwinfo: true dependency.apt.intel-igc-cm: true - dependency.apt.intel-level-zero-gpu: '1.3.27642.40-803~22.04' + dependency.apt.intel-level-zero-gpu: true dependency.apt.intel-media-va-driver-non-free: true - dependency.apt.intel-opencl-icd: '23.43.27642.40-803~22.04' - dependency.apt.level-zero: '1.14.0-744~22.04' - dependency.apt.level-zero-dev: '1.14.0-744~22.04' + dependency.apt.intel-opencl-icd: true + dependency.apt.level-zero: true + dependency.apt.level-zero-dev: true dependency.apt.libegl1-mesa: true dependency.apt.libegl1-mesa-dev: true dependency.apt.libegl-mesa0: true @@ -120,7 +125,6 @@ services: dependency.apt.libmfxgen1: true dependency.apt.libopenmpi-dev: true dependency.apt.libvpl2: true - dependency.apt.libxatracker2: true dependency.apt.make: true dependency.apt.mesa-va-drivers: true dependency.apt.mesa-vdpau-drivers: true @@ -138,68 +142,72 @@ services: dependency.apt.vainfo: true dependency.apt.wget: true dependency.apt.xz-utils: true - dependency.conda.jupyterlab: '>=4.1.8' - dependency.conda.aiohttp: '>=3.9.0' - dependency.conda.cryptography: '>=42.0.4' - dependency.conda.dataset_librarian: '>=1.0.4' - dependency.conda.deepspeed: '>=0.14.0' - dependency.conda.dpcpp_impl_linux-64: '>=2024.1.' - dependency.conda.dpcpp-cpp-rt: '>=2024.1.' - dependency.conda.dpnp: '>=0.14.0' - dependency.conda.idna: '>=3.7' - dependency.conda.impi-devel: '>=2021.12' - dependency.conda.intel-extension-for-pytorch_cpu: '>=2.2.0=*cpu*' - dependency.conda.intel-extension-for-pytorch_gpu: '>=2.1.20=*xpu*' - dependency.conda.intel-extension-for-tensorflow_cpu: '>=2.15=*cpu*' - dependency.conda.intel-extension-for-tensorflow_gpu: '>=2.15=*xpu*' - dependency.conda.intel-openmp: '>=2024.1.0' - dependency.conda.intel-optimization-for-horovod: '>=0.28.1.4' - dependency.conda.ipykernel: '>=6.29.3' - dependency.conda.ipython: '>=8.18.1' - dependency.conda.jinja2: '>=3.1.3' - dependency.conda.jupyterhub: '>=4.1.5' - dependency.conda.jupyter-server-proxy: '>=4.1.2' - dependency.conda.kernda: '>=0.3.0' - dependency.conda.mako: '>=1.2.2' - dependency.conda.matplotlib-base: '>=3.4.3' - dependency.conda.mkl-dpcpp: '>=2024.1.0' - dependency.conda.neural-compressor: '>=2.4.1' - dependency.conda.nodejs: '>=20.12.2' - dependency.conda.notebook: '>=7.1.3' - dependency.conda.numpy: '>=1.26.4' - dependency.conda.oauthlib: '>=3.2.2' - dependency.conda.oneccl_bind_pt_cpu: '>=2.2.0=*cpu*' - dependency.conda.oneccl_bind_pt_gpu: '>=2.1.200=*xpu*' + dependency.conda.accelerate: '==0.32.1' + dependency.conda.colorama: '==0.4.6' + dependency.conda.conda: '==24.5.0' + dependency.conda.dpnp: '=0.15.0' + dependency.conda.intel-extension-for-pytorch_cpu: '=2.3.100' + dependency.conda.intel-extension-for-pytorch_xpu: '=2.1.40' + dependency.conda.intel-extension-for-tensorflow_cpu: '=2.15.0=*cpu*' + dependency.conda.intel-extension-for-tensorflow_xpu: '=2.15.0.1=*xpu*' + dependency.conda.intel-optimization-for-horovod: '=0.28.1.5' + dependency.conda.ipykernel: '==6.29.5' + dependency.conda.jupyterhub: '==5.1.0' + dependency.conda.jupyter-server-proxy: '==4.3.0' + dependency.conda.kernda: '==0.3.0' + dependency.conda.ld_impl_linux-64: '==2.40' + dependency.conda.mamba: '==1.5.8' + dependency.conda.matplotlib-base: '>=3.8.4' + dependency.conda.mpi: '==1.0' + dependency.conda.mpich: '==4.2.2' + dependency.conda.networkx: '==3.3' + dependency.conda.neural-compressor: '=2.5.1' + dependency.conda.notebook: '==7.2.1' + dependency.conda.oneccl_bind_pt_cpu: '=2.3.0' + dependency.conda.oneccl_bind_pt_xpu: '=2.1.400' dependency.conda.onnx: '>=1.14.1' - dependency.conda.packaging: '>=23.2' - dependency.conda.pandas: '>=2.2.2' - dependency.conda.pillow: '>=10.2.0' - dependency.conda.protobuf: '>=4.24' - dependency.conda.pyjwt: '>=2.4.0' - dependency.conda.python: "=${PYTHON_VERSION:-3.10}" - dependency.conda.pytorch_cpu: '>=2.2.0=*cpu*' - dependency.conda.pytorch_gpu: '>=2.1.0=*xpu*' - dependency.conda.setuptools: '>=69.1.0' - dependency.conda.tensorboardx: '>=2.6.2.2' - dependency.conda.tensorflow: '>=2.15' - dependency.conda.torchaudio_cpu: '>=2.2.0=*cpu*' - dependency.conda.torchaudio_gpu: '>=2.1.0=*xpu*' - dependency.conda.torchvision_cpu: '>=0.17=*cpu*' - dependency.conda.torchvision_gpu: '>=0.16.0=*xpu*' - dependency.conda.tornado: '>=6.3.3' - dependency.conda.tqdm: '>=4.66.2' - dependency.conda.werkzeug: '>=2.2.3' - target: inference-optimization + dependency.conda.onnxruntime: '==1.18.1' + dependency.conda.py-cpuinfo: '==9.0.0' + dependency.conda.python: '==3.10.14' + dependency.conda.pytorch_cpu: '=2.3.1' + dependency.conda.pytorch_xpu: '=2.1.0' + dependency.conda.scikit-learn: '>=1.5.0' + dependency.conda.tensorboardx: '==2.6.2.2' + dependency.conda.tensorflow: '=2.15.1' + dependency.conda.tensorflow-hub: '==0.16.1' + dependency.conda.tf2onnx: '==1.16.1' + dependency.conda.torchaudio_cpu: '=2.3.1' + dependency.conda.torchaudio_xpu: '=2.1.0' + dependency.conda.torchvision_cpu: '=0.18.1' + dependency.conda.torchvision_xpu: '=0.16.0' + dependency.conda.tqdm: '==4.66.4' depends_on: - dl-base extends: dl-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-2024.1.0-py${PYTHON_VERSION:-3.10} - command: | - bash -c "conda run -n pytorch-cpu python -c 'import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ - conda run -n pytorch-cpu python -c 'import neural_compressor;print(\"Neural Compressor Version:\", neural_compressor.__version__)' && \ - conda run -n pytorch-gpu python -c 'import torch;print(torch.device(\"xpu\"));import intel_extension_for_pytorch as ipex;print(ipex.xpu.is_available());' && \ - conda run -n pytorch-gpu python -c 'import neural_compressor;print(\"Neural Compressor Version:\", neural_compressor.__version__)' && \ - conda run -n tensorflow-cpu python -c 'import intel_extension_for_tensorflow as itex;print(itex.__version__);' && \ - conda run -n tensorflow-cpu python -c 'import neural_compressor, tf2onnx; print(\"\\nNeural Compressor Version:\", neural_compressor.__version__, \"\\\nTensorFlow2ONNX Version:\", tf2onnx.__version__)' && \ - conda run -n tensorflow-gpu python -c 'from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())' && \ - conda run -n tensorflow-gpu python -c 'import neural_compressor, tf2onnx; print(\"\\nNeural Compressor Version:\", neural_compressor.__version__, \"\\\nTensorFlow2ONNX Version:\", tf2onnx.__version__)'" + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + command: > + bash -c "conda run -n pytorch-cpu python -c 'import intel_extension_for_pytorch + as ipex;print(ipex.__version__)' && + + conda run -n pytorch-cpu python -c 'import neural_compressor;print(\"Neural + Compressor Version:\", neural_compressor.__version__)' && + + conda run -n pytorch-gpu python -c 'import torch;print(torch.device(\"xpu\"));import + intel_extension_for_pytorch as ipex;print(ipex.xpu.is_available())' && + + conda run -n pytorch-gpu python -c 'import neural_compressor;print(\"Neural + Compressor Version:\", neural_compressor.__version__)' && + + conda run -n tensorflow-cpu python -c 'import intel_extension_for_tensorflow + as itex;print(itex.__version__)' && + + conda run -n tensorflow-cpu python -c 'import neural_compressor, tf2onnx;print(\"\nNeural + Compressor Version:\", neural_compressor.__version__)';print(\"\nTensorFlow2ONNX + Version:\", tf2onnx.__version__)' && + + conda run -n tensorflow-gpu python -c 'from tensorflow.python.client import + device_lib;print(device_lib.list_local_devices())' && + + conda run -n tensorflow-gpu python -c 'import neural_compressor, tf2onnx;print(\"\\nNeural + Compressor Version:\", neural_compressor.__version__)';print(\"\\TensorFlow2ONNX + Version:\", tf2onnx.__version__)' " diff --git a/preset/inference-optimization/requirements.txt b/preset/inference-optimization/requirements.txt index 15dad774..8f0091ac 100644 --- a/preset/inference-optimization/requirements.txt +++ b/preset/inference-optimization/requirements.txt @@ -1,16 +1,5 @@ -accelerate>=0.30.0 -cloud-data-connector>=1.0.3 -cryptography>=42.0.7 -dataset-librarian>=1.0.4 -datasets>=2.19.1 -evaluate>=0.4.2 +dataset-librarian==1.0.4 +evaluate==0.4.2 git+https://github.com/huggingface/optimum-intel.git -ninja>=1.11.1.1 -onnxruntime>=1.17.3 -py-cpuinfo>=9.0.0 -python-dotenv>=1.0.1 -requests>=2.31.0 -tensorflow-hub>=0.16.1 -tf2onnx>==1.16.1 -tqdm>=4.66.2 -transformers>=4.40.2 +tf2onnx==1.16.1 +onnxruntime==1.18.1 diff --git a/preset/inference-optimization/tests.yaml b/preset/inference-optimization/tests.yaml index 98731067..a906ecde 100644 --- a/preset/inference-optimization/tests.yaml +++ b/preset/inference-optimization/tests.yaml @@ -12,78 +12,87 @@ # See the License for the specific language governing permissions and # limitations under the License. +--- inference-optimization-inc-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/inc-ipex-quantization/quantize_with_inc.ipynb result.ipynb -k pytorch-cpu --cwd jupyter/inc-ipex-quantization - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True inference-optimization-inc-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-gpu: cmd: papermill --log-output jupyter/inc-ipex-quantization/quantize_with_inc.ipynb result.ipynb -k pytorch-gpu --cwd jupyter/inc-ipex-quantization - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True + device: ["/dev/dri"] inference-optimization-inc-itex-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/inc-itex/inc_sample_tensorflow.ipynb result.ipynb -k tensorflow-cpu --cwd jupyter/inc-itex - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True # Status: Commented due to out of resources error # inference-optimization-inc-itex-notebook-${PYTHON_VERSION:-3.9}-gpu: # cmd: papermill --log-output jupyter/inc-itex/inc_sample_tensorflow.ipynb result.ipynb -k tensorflow-gpu --cwd jupyter/inc-itex -# img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} +# img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} # notebook: True inference-optimization-inc-tensorflow-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu sample-tests/neural_compressor/tensorflow/run.sh cpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-inc-tensorflow-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu sample-tests/neural_compressor/tensorflow/run.sh gpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] inference-optimization-inc-torch-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu sample-tests/neural_compressor/torch/run.sh cpu - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-ipex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device cpu --ipex - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-ipex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device xpu --ipex - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] inference-optimization-itex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-itex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] inference-optimization-itex-inference-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/itex-inference/tutorial_optimize_TensorFlow_pretrained_model.ipynb result.ipynb -k tensorflow-cpu --cwd jupyter/itex-inference - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True # Need update from TensorFlow v1 to V2 # inference-optimization-itex-inference-notebook-${PYTHON_VERSION:-3.9}-gpu: # cmd: papermill --log-output jupyter/itex-inference/tutorial_optimize_TensorFlow_pretrained_model.ipynb result.ipynb -k tensorflow-gpu --cwd jupyter/itex-inference -# img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} +# img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} # notebook: True inference-optimization-onnx-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu sample-tests/onnx/run.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-onnx-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu sample-tests/onnx/run.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] inference-optimization-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] inference-optimization-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} inference-optimization-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: amr-registry.caas.intel.com/aiops/aikit-products-dev:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} + device: ["/dev/dri"] From 58cc2a39570246d05a2fe03d77a5232b98194768 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:57:45 +0000 Subject: [PATCH 02/50] Bump step-security/harden-runner from 2.9.0 to 2.9.1 (#302) --- .github/workflows/chart-ci.yaml | 2 +- .github/workflows/container-ci.yaml | 10 +++++----- .github/workflows/dependency-review.yaml | 2 +- .github/workflows/dockerhub-description.yml | 4 ++-- .github/workflows/docs.yaml | 2 +- .github/workflows/integration-test.yaml | 4 ++-- .github/workflows/lint.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- .github/workflows/security-report.yaml | 2 +- .github/workflows/test-runner-ci.yaml | 6 +++--- .github/workflows/weekly-test.yaml | 6 +++--- 11 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/chart-ci.yaml b/.github/workflows/chart-ci.yaml index 24b5404e..916423b2 100644 --- a/.github/workflows/chart-ci.yaml +++ b/.github/workflows/chart-ci.yaml @@ -26,7 +26,7 @@ jobs: runs-on: kubectl steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 6dbf8532..3fb4f8a6 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -66,7 +66,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -117,7 +117,7 @@ jobs: matrix: ${{ steps.scan-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -136,7 +136,7 @@ jobs: fail-fast: false steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -170,7 +170,7 @@ jobs: matrix: ${{ steps.test-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -187,7 +187,7 @@ jobs: experimental: [true] fail-fast: false steps: - - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml index 9feca423..cce8357f 100644 --- a/.github/workflows/dependency-review.yaml +++ b/.github/workflows/dependency-review.yaml @@ -34,7 +34,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml index f3bbd9bf..201e8888 100644 --- a/.github/workflows/dockerhub-description.yml +++ b/.github/workflows/dockerhub-description.yml @@ -24,7 +24,7 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -39,7 +39,7 @@ jobs: fail-fast: false steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 900afb5f..0b8742c6 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -32,7 +32,7 @@ jobs: pages: write steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 2a102efd..2a112c5b 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -26,7 +26,7 @@ jobs: groups: ${{ steps.group-list.outputs.FOLDERS }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -118,7 +118,7 @@ jobs: if: always() steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - run: exit 1 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 0d170a62..2e550689 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -31,7 +31,7 @@ jobs: statuses: write steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 72abc0f8..d74f88a2 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -36,7 +36,7 @@ jobs: actions: read steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/security-report.yaml b/.github/workflows/security-report.yaml index f1ccde65..444262e8 100644 --- a/.github/workflows/security-report.yaml +++ b/.github/workflows/security-report.yaml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: rsdmike/github-security-report-action@a149b24539044c92786ec39af8ba38c93496495d # v3.0.4 diff --git a/.github/workflows/test-runner-ci.yaml b/.github/workflows/test-runner-ci.yaml index 6ef0e617..448e0970 100644 --- a/.github/workflows/test-runner-ci.yaml +++ b/.github/workflows/test-runner-ci.yaml @@ -33,7 +33,7 @@ jobs: fail-fast: true steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -66,7 +66,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0 @@ -76,7 +76,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/weekly-test.yaml b/.github/workflows/weekly-test.yaml index 41c8a1df..41189eed 100644 --- a/.github/workflows/weekly-test.yaml +++ b/.github/workflows/weekly-test.yaml @@ -25,7 +25,7 @@ jobs: groups: ${{ steps.group-list.outputs.FOLDERS }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -56,7 +56,7 @@ jobs: runs-on: kubectl steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -72,7 +72,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 From 45d23db760230e34ab14536656de6af62985925a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 09:05:37 -0700 Subject: [PATCH 03/50] Bump the python group in /python with 3 updates (#297) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- python/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/requirements.txt b/python/requirements.txt index 0418b164..fa2002ba 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,6 @@ numpy==1.26.4 setuptools>=70.0.0 psutil==6.0.0 -mkl==2024.2.0 -mkl-include==2024.2.0 -intel-openmp==2024.2.0 +mkl==2024.2.1 +mkl-include==2024.2.1 +intel-openmp==2024.2.1 From 325d79ceaca189e55a7f279e065d287925ab3b1d Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Mon, 12 Aug 2024 09:16:59 -0700 Subject: [PATCH 04/50] Remove keyless sources from pytorch xpu (#293) Signed-off-by: Tyler Titsworth Co-authored-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 809da9c8..0a68ac58 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -194,7 +194,8 @@ RUN apt-get update && \ WORKDIR / COPY xpu-requirements.txt . -RUN python -m pip install --no-cache-dir -r xpu-requirements.txt +RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ + rm -rf xpu-requirements.txt /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH From 31f296aa870fbc58c658e21d1e034f7cf18b297f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:27:49 +0000 Subject: [PATCH 05/50] Bump github/codeql-action from 3.25.15 to 3.26.0 (#303) --- .github/workflows/container-ci.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 3fb4f8a6..20213f5b 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -155,7 +155,7 @@ jobs: - name: Cleanup if: always() run: docker rmi -f ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} - - uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + - uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 with: sarif_file: '${{ matrix.container }}-scan.sarif' category: '${{ matrix.container }}' diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index d74f88a2..fd31f769 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -53,6 +53,6 @@ jobs: name: SARIF file path: results.sarif retention-days: 5 - - uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + - uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 with: sarif_file: results.sarif From 88325dfdb4d19586339b768b1feb78d54fb51e11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:38:55 +0000 Subject: [PATCH 06/50] Bump actions/upload-artifact from 4.3.5 to 4.3.6 (#304) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/scorecard.yaml | 2 +- .github/workflows/security-report.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index fd31f769..086103d8 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -48,7 +48,7 @@ jobs: results_format: sarif repo_token: ${{ secrets.GITHUB_TOKEN }} publish_results: true - - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/security-report.yaml b/.github/workflows/security-report.yaml index 444262e8..a9d3b98b 100644 --- a/.github/workflows/security-report.yaml +++ b/.github/workflows/security-report.yaml @@ -35,7 +35,7 @@ jobs: sarifReportDir: ${{ github.workspace }} template: report token: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 with: name: Security Report Summary path: ./*.pdf From 9803461b3b2da4b3d302dc64680b5d45e61657a8 Mon Sep 17 00:00:00 2001 From: Sharvil Shah Date: Tue, 13 Aug 2024 11:57:33 -0700 Subject: [PATCH 07/50] Token Authentication enabled and tested for Torchserve workflow (#306) Signed-off-by: sharvil10 Signed-off-by: Tyler Titsworth Signed-off-by: dependabot[bot] Co-authored-by: Tyler Titsworth Co-authored-by: Srikanth Ramakrishna Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pytorch/serving/config.properties | 1 + workflows/charts/torchserve/README.md | 3 ++- workflows/charts/torchserve/templates/NOTES.txt | 5 +++++ workflows/charts/torchserve/templates/deploy.yaml | 3 +++ workflows/charts/torchserve/values.yaml | 2 ++ 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pytorch/serving/config.properties b/pytorch/serving/config.properties index 8f17094d..ecaec9e0 100644 --- a/pytorch/serving/config.properties +++ b/pytorch/serving/config.properties @@ -12,3 +12,4 @@ cpu_launcher_enable=true cpu_launcher_args=--use_logical_core disable_token_authorization=true enable_model_api=true +enable_envvars_config=true diff --git a/workflows/charts/torchserve/README.md b/workflows/charts/torchserve/README.md index b84a964c..b35cc7d4 100644 --- a/workflows/charts/torchserve/README.md +++ b/workflows/charts/torchserve/README.md @@ -18,6 +18,7 @@ For more information about how to use Intel Optimized TorchServe, check out the | deploy.resources.limits | object | `{"cpu":"4000m","memory":"1Gi"}` | Maximum resources per pod | | deploy.resources.requests | object | `{"cpu":"1000m","memory":"512Mi"}` | Minimum resources per pod | | deploy.storage.nfs | object | `{"enabled":false,"path":"nil","readOnly":true,"server":"nil","subPath":"nil"}` | Network File System (NFS) storage for models | +| deploy.tokens_disabled | bool | `false` | Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. | | fullnameOverride | string | `""` | Full qualified Domain Name | | nameOverride | string | `""` | Name of the serving service | | pvc.size | string | `"1Gi"` | Size of the storage | @@ -37,4 +38,4 @@ There are some additional steps that can be taken to prepare your service for yo - Integrate an [SSL Certificate](https://pytorch.org/serve/configuration.html#enable-ssl) in your model config file to serve models securely. ---------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.13.1](https://github.com/norwoodj/helm-docs/releases/v1.13.1) +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/workflows/charts/torchserve/templates/NOTES.txt b/workflows/charts/torchserve/templates/NOTES.txt index 8796b205..7cf61fc4 100644 --- a/workflows/charts/torchserve/templates/NOTES.txt +++ b/workflows/charts/torchserve/templates/NOTES.txt @@ -14,3 +14,8 @@ echo "Visit http://127.0.0.1:8080 to use your application" kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT {{- end }} +{{- if eq false .Values.deploy.tokens_disabled }} +2. Display the tokens for accessing the APIs. For more details about token authentication checkout: https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "torchserve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl exec --namespace {{ .Release.Namespace }} $POD_NAME -- cat /home/model-server/key_file.json +{{- end }} diff --git a/workflows/charts/torchserve/templates/deploy.yaml b/workflows/charts/torchserve/templates/deploy.yaml index 544a2fb1..85f03142 100644 --- a/workflows/charts/torchserve/templates/deploy.yaml +++ b/workflows/charts/torchserve/templates/deploy.yaml @@ -47,6 +47,9 @@ spec: - configMapRef: name: {{ .Values.deploy.env.configMapName }} {{- end }} + env: + - name: TS_DISABLE_TOKEN_AUTHORIZATION + value: "{{ .Values.deploy.tokens_disabled }}" ports: - name: rest-1 containerPort: 8080 diff --git a/workflows/charts/torchserve/values.yaml b/workflows/charts/torchserve/values.yaml index e95efb15..f59e1c40 100644 --- a/workflows/charts/torchserve/values.yaml +++ b/workflows/charts/torchserve/values.yaml @@ -23,6 +23,8 @@ deploy: env: configMapName: intel-proxy-config enabled: true + # -- Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. + tokens_disabled: true # -- Models to be loaded models: all # -- Model Server Configuration file location From 4dded501f37391f91397f4c4a589977e5bedb1a2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:34:05 -0700 Subject: [PATCH 08/50] Bump the pytorch group across 1 directory with 14 updates (#309) Signed-off-by: dependabot[bot] Signed-off-by: tylertitsworth Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: tylertitsworth --- pytorch/README.md | 31 +++++++++++++++++-------- pytorch/docker-compose.yaml | 36 ++++++++++++++--------------- pytorch/hf-genai-requirements.txt | 10 ++++---- pytorch/jupyter-requirements.txt | 2 +- pytorch/multinode/requirements.txt | 6 ++--- pytorch/requirements.txt | 10 ++++---- pytorch/serving/README.md | 6 ++--- pytorch/tests/tests.yaml | 20 ++++++++-------- pytorch/torchserve-requirements.txt | 6 ++--- pytorch/xpu-requirements.txt | 2 +- 10 files changed, 70 insertions(+), 59 deletions(-) diff --git a/pytorch/README.md b/pytorch/README.md index c2302f78..de9a45b3 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -66,7 +66,8 @@ The images below are built only with CPU optimizations (GPU acceleration support | Tag(s) | Pytorch | IPEX | Dockerfile | | -------------------------- | -------- | ------------ | --------------- | -| `2.3.0-pip-base`, `latest` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | +| `2.4.0-pip-base`, `latest` | [v2.4.0] | [v2.4.0+cpu] | [v0.4.0-Beta] | +| `2.3.0-pip-base` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | | `2.2.0-pip-base` | [v2.2.0] | [v2.2.0+cpu] | [v0.3.4] | | `2.1.0-pip-base` | [v2.1.0] | [v2.1.0+cpu] | [v0.2.3] | | `2.0.0-pip-base` | [v2.0.0] | [v2.0.0+cpu] | [v0.1.0] | @@ -83,6 +84,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Dockerfile | | ------------------- | -------- | ------------ | --------------- | +| `2.4.0-pip-jupyter` | [v2.4.0] | [v2.4.0+cpu] | [v0.4.0-Beta] | | `2.3.0-pip-jupyter` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | | `2.2.0-pip-jupyter` | [v2.2.0] | [v2.2.0+cpu] | [v0.3.4] | | `2.1.0-pip-jupyter` | [v2.1.0] | [v2.1.0+cpu] | [v0.2.3] | @@ -93,7 +95,7 @@ docker run -it --rm \ -p 8888:8888 \ -v $PWD/workspace:/workspace \ -w /workspace \ - intel/intel-extension-for-pytorch:2.3.0-pip-jupyter + intel/intel-extension-for-pytorch:2.4.0-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -104,6 +106,7 @@ The images below additionally include [Intel® oneAPI Collective Communications | Tag(s) | Pytorch | IPEX | oneCCL | INC | Dockerfile | | --------------------- | -------- | ------------ | -------------------- | --------- | -------------- | +| `2.4.0-pip-multinode` | [v2.4.0] | [v2.4.0+cpu] | [v2.4.0][ccl-v2.4.0] | [v3.0] | [v0.4.0-Beta] | | `2.3.0-pip-multinode` | [v2.3.0] | [v2.3.0+cpu] | [v2.3.0][ccl-v2.3.0] | [v2.6] | [v0.4.0-Beta] | | `2.2.0-pip-multinode` | [v2.2.2] | [v2.2.0+cpu] | [v2.2.0][ccl-v2.2.0] | [v2.6] | [v0.4.0-Beta] | | `2.1.100-pip-mulitnode` | [v2.1.2] | [v2.1.100+cpu] | [v2.1.0][ccl-v2.1.0] | [v2.6] | [v0.4.0-Beta] | @@ -186,7 +189,7 @@ To add these files correctly please follow the steps described below. -v $PWD/authorized_keys:/etc/ssh/authorized_keys \ -v $PWD/tests:/workspace/tests \ -w /workspace \ - intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + intel/intel-extension-for-pytorch:2.4.0-pip-multinode \ bash -c '/usr/sbin/sshd -D' ``` @@ -199,7 +202,7 @@ To add these files correctly please follow the steps described below. -v $PWD/tests:/workspace/tests \ -v $PWD/hostfile:/workspace/hostfile \ -w /workspace \ - intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + intel/intel-extension-for-pytorch:2.4.0-pip-multinode \ bash -c 'ipexrun cpu --nnodes 2 --nprocs-per-node 1 --master-addr 127.0.0.1 --master-port 3022 /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' ``` @@ -227,7 +230,7 @@ Additionally, if you have a [DeepSpeed* configuration](https://www.deepspeed.ai/ -v $PWD/hostfile:/workspace/hostfile \ -v $PWD/ds_config.json:/workspace/ds_config.json \ -w /workspace \ - intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + intel/intel-extension-for-pytorch:2.4.0-pip-multinode \ bash -c 'deepspeed --launcher IMPI \ --master_addr 127.0.0.1 --master_port 3022 \ --deepspeed_config ds_config.json --hostfile /workspace/hostfile \ @@ -240,9 +243,9 @@ Additionally, if you have a [DeepSpeed* configuration](https://www.deepspeed.ai/ The image below is an extension of the IPEX Multi-Node Container designed to run Hugging Face Generative AI scripts. The container has the typical installations needed to run and fine tune PyTorch generative text models from Hugging Face. It can be used to run multinode jobs using the same instructions from the [IPEX Multi-Node container](#setup-and-run-ipex-multi-node-container). -| Tag(s) | Pytorch | IPEX | oneCCL | transformers | Dockerfile | -| --------------------- | -------- | ------------ | -------------------- | --------- | --------------- | -| `2.3.0-pip-multinode-hf-4.41.2-genai` | [v2.3.1](https://github.com/pytorch/pytorch/releases/tag/v2.3.1) | [v2.3.0+cpu] | [v2.3.0][ccl-v2.3.0] | [v4.41.2] | [v0.4.0-Beta] | +| Tag(s) | Pytorch | IPEX | oneCCL | HF Transformers | Dockerfile | +| ------------------------------------- | -------- | ------------ | -------------------- | --------------- | --------------- | +| `2.4.0-pip-multinode-hf-4.44.0-genai` | [v2.4.0] | [v2.4.0+cpu] | [v2.4.0][ccl-v2.4.0] | [v4.44.0] | [v0.4.0-Beta] | Below is an example that shows single node job with the existing [`finetune.py`](../workflows/charts/huggingface-llm/scripts/finetune.py) script. @@ -251,7 +254,7 @@ Below is an example that shows single node job with the existing [`finetune.py`] docker run -it \ -v $PWD/workflows/charts/huggingface-llm/scripts:/workspace/scripts \ -w /workspace/scripts \ - intel/intel-extension-for-pytorch:2.3.0-pip-multinode-hf-4.41.2-genai \ + intel/intel-extension-for-pytorch:2.4.0-pip-multinode-hf-4.44.0-genai \ bash -c 'python finetune.py ' ``` @@ -261,6 +264,7 @@ The images below are [TorchServe*] with CPU Optimizations: | Tag(s) | Pytorch | IPEX | Dockerfile | | ------------------- | -------- | ------------ | --------------- | +| `2.4.0-serving-cpu` | [v2.4.0] | [v2.4.0+cpu] | [v0.4.0-Beta] | | `2.3.0-serving-cpu` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | | `2.2.0-serving-cpu` | [v2.2.0] | [v2.2.0+cpu] | [v0.3.4] | @@ -272,6 +276,7 @@ The images below are built only with CPU optimizations (GPU acceleration support | Tag(s) | Pytorch | IPEX | Dockerfile | | ---------------- | -------- | ------------ | --------------- | +| `2.4.0-idp-base` | [v2.4.0] | [v2.4.0+cpu] | [v0.4.0-Beta] | | `2.3.0-idp-base` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | | `2.2.0-idp-base` | [v2.2.0] | [v2.2.0+cpu] | [v0.3.4] | | `2.1.0-idp-base` | [v2.1.0] | [v2.1.0+cpu] | [v0.2.3] | @@ -281,6 +286,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Dockerfile | | ------------------- | -------- | ------------ | --------------- | +| `2.4.0-idp-jupyter` | [v2.4.0] | [v2.4.0+cpu] | [v0.4.0-Beta] | | `2.3.0-idp-jupyter` | [v2.3.0] | [v2.3.0+cpu] | [v0.4.0-Beta] | | `2.2.0-idp-jupyter` | [v2.2.0] | [v2.2.0+cpu] | [v0.3.4] | | `2.1.0-idp-jupyter` | [v2.1.0] | [v2.1.0+cpu] | [v0.2.3] | @@ -290,6 +296,7 @@ The images below additionally include [Intel® oneAPI Collective Communications | Tag(s) | Pytorch | IPEX | oneCCL | INC | Dockerfile | | --------------------- | -------- | ------------ | -------------------- | --------- | --------------- | +| `2.4.0-idp-multinode` | [v2.4.0] | [v2.4.0+cpu] | [v2.4.0][ccl-v2.3.0] | [v3.0] | [v0.4.0-Beta] | | `2.3.0-idp-multinode` | [v2.3.0] | [v2.3.0+cpu] | [v2.3.0][ccl-v2.3.0] | [v2.6] | [v0.4.0-Beta] | | `2.2.0-idp-multinode` | [v2.2.0] | [v2.2.0+cpu] | [v2.2.0][ccl-v2.2.0] | [v2.4.1] | [v0.3.4] | | `2.1.0-idp-mulitnode` | [v2.1.0] | [v2.1.0+cpu] | [v2.1.0][ccl-v2.1.0] | [v2.3.1] | [v0.2.3] | @@ -380,6 +387,7 @@ It is the image user's responsibility to ensure that any use of The images below [v2.1.10+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.10%2Bxpu [v2.0.110+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu +[v2.4.0]: https://github.com/pytorch/pytorch/releases/tag/v2.4.0 [v2.3.0]: https://github.com/pytorch/pytorch/releases/tag/v2.3.0 [v2.2.2]: https://github.com/pytorch/pytorch/releases/tag/v2.2.2 [v2.2.0]: https://github.com/pytorch/pytorch/releases/tag/v2.2.0 @@ -388,11 +396,13 @@ It is the image user's responsibility to ensure that any use of The images below [v2.0.1]: https://github.com/pytorch/pytorch/releases/tag/v2.0.1 [v2.0.0]: https://github.com/pytorch/pytorch/releases/tag/v2.0.0 +[v3.0]: https://github.com/intel/neural-compressor/releases/tag/v3.0 [v2.6]: https://github.com/intel/neural-compressor/releases/tag/v2.6 [v2.4.1]: https://github.com/intel/neural-compressor/releases/tag/v2.4.1 [v2.3.1]: https://github.com/intel/neural-compressor/releases/tag/v2.3.1 [v2.1.1]: https://github.com/intel/neural-compressor/releases/tag/v2.1.1 +[v2.4.0+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.4.0%2Bcpu [v2.3.0+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.3.0%2Bcpu [v2.2.0+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.2.0%2Bcpu [v2.1.100+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.0%2Bcpu @@ -400,13 +410,14 @@ It is the image user's responsibility to ensure that any use of The images below [v2.0.100+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.0.0%2Bcpu [v2.0.0+cpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.0.0%2Bcpu +[ccl-v2.4.0]: https://github.com/intel/torch-ccl/releases/tag/v2.4.0%2Bcpu%2Brc0 [ccl-v2.3.0]: https://github.com/intel/torch-ccl/releases/tag/v2.3.0%2Bcpu [ccl-v2.2.0]: https://github.com/intel/torch-ccl/releases/tag/v2.2.0%2Bcpu [ccl-v2.1.0]: https://github.com/intel/torch-ccl/releases/tag/v2.1.0%2Bcpu [ccl-v2.0.0]: https://github.com/intel/torch-ccl/releases/tag/v2.1.0%2Bcpu -[v4.41.2]: https://github.com/huggingface/transformers/releases/tag/v4.41.2 +[v4.44.0]: https://github.com/huggingface/transformers/releases/tag/v4.44.0 [803]: https://dgpu-docs.intel.com/releases/LTS_803.29_20240131.html [736]: https://dgpu-docs.intel.com/releases/stable_736_25_20231031.html diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 03f51ab4..75d2e8d2 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -25,16 +25,16 @@ services: BASE_IMAGE_NAME: ${BASE_IMAGE_NAME:-ubuntu} BASE_IMAGE_TAG: ${BASE_IMAGE_TAG:-22.04} GITHUB_RUN_NUMBER: ${GITHUB_RUN_NUMBER:-0} - IPEX_VERSION: ${IPEX_VERSION:-2.3.0} + IPEX_VERSION: ${IPEX_VERSION:-2.4.0} MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} NO_PROXY: '' PACKAGE_OPTION: ${PACKAGE_OPTION:-pip} PYTHON_VERSION: ${PYTHON_VERSION:-3.10} - PYTORCH_VERSION: ${PYTORCH_VERSION:-2.3.0+cpu} + PYTORCH_VERSION: ${PYTORCH_VERSION:-2.4.0+cpu} REGISTRY: ${REGISTRY} REPO: ${REPO} - TORCHAUDIO_VERSION: ${TORCHAUDIO_VERSION:-2.3.0+cpu} - TORCHVISION_VERSION: ${TORCHVISION_VERSION:-0.18.0+cpu} + TORCHAUDIO_VERSION: ${TORCHAUDIO_VERSION:-2.4.0} + TORCHVISION_VERSION: ${TORCHVISION_VERSION:-0.19.0} context: . labels: dependency.python: ${PYTHON_VERSION:-3.10} @@ -43,21 +43,21 @@ services: org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.name: "intel/intel-optimized-pytorch" org.opencontainers.image.title: "Intel® Extension for PyTorch Base Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-base + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-base target: ipex-base-${PACKAGE_OPTION:-pip} command: > sh -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'" depends_on: - ${PACKAGE_OPTION:-pip} - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base pull_policy: always jupyter: build: labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch Jupyter Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-jupyter + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-jupyter target: jupyter command: > bash -c "python -m jupyter --version" @@ -65,7 +65,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-jupyter network_mode: host ports: - 8888:8888 @@ -79,9 +79,9 @@ services: dependency.pip.apt.virtualenv: true dependency.pip.deepspeed: 0.14.4 dependency.python.pip: multinode/requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch MultiNode Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-multinode + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-multinode target: multinode command: > bash -c "python -c 'import neural_compressor;import oneccl_bindings_for_pytorch as oneccl;import deepspeed; @@ -89,7 +89,7 @@ services: \"\\nOneCCL:\", oneccl.__version__, \"\\nDeepspeed:\", deepspeed.__version__)'" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} shm_size: 2gb xpu: build: @@ -177,7 +177,7 @@ services: docs: serving org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch Serving Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-serving-cpu + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-serving-cpu target: torchserve command: torchserve --version entrypoint: "" @@ -192,14 +192,14 @@ services: hf-genai: build: args: - HF_VERSION: ${HF_VERSION:-4.41.2} + HF_VERSION: ${HF_VERSION:-4.44.0} labels: dependency.python.pip: hf-genai-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.3.0}-${PACKAGE_OPTION:-pip}-multinode" + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-multinode" org.opencontainers.image.title: "Intel® Extension for PyTorch MultiNode Huggingface Generative AI Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.3.0}-${PACKAGE_OPTION:-pip}-multinode-hf-${HF_VERSION:-4.41.2}-genai" + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-multinode-hf-${HF_VERSION:-4.44.0}-genai" target: hf-genai extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-hf-${HF_VERSION:-4.41.2} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-hf-${HF_VERSION:-4.44.0} command: > - bash -c "python-c 'import transformers; print(transformers.__version__)'" + bash -c "python -c 'import transformers; print(transformers.__version__)'" diff --git a/pytorch/hf-genai-requirements.txt b/pytorch/hf-genai-requirements.txt index df77695f..6671cbaf 100644 --- a/pytorch/hf-genai-requirements.txt +++ b/pytorch/hf-genai-requirements.txt @@ -1,13 +1,13 @@ -accelerate==0.32.1 -datasets==2.20.0 +accelerate==0.33.0 +datasets==2.21.0 einops==0.8.0 evaluate==0.4.2 onnxruntime-extensions==0.11.0 onnxruntime==1.18.1 -peft==0.11.1 -protobuf==5.27.2 +peft==0.12.0 +protobuf==5.27.3 py-cpuinfo==9.0.0 scikit-learn==1.5.1 SentencePiece==0.2.0 tokenizers==0.19.1 -transformers==4.42.4 +transformers==4.44.0 diff --git a/pytorch/jupyter-requirements.txt b/pytorch/jupyter-requirements.txt index b5ab6652..e95ad6e8 100644 --- a/pytorch/jupyter-requirements.txt +++ b/pytorch/jupyter-requirements.txt @@ -1,4 +1,4 @@ -jupyterlab==4.3.0a2 +jupyterlab==4.3.0b0 jupyterhub==5.1.0 notebook==7.3.0a1 jupyter-server-proxy>=4.1.2 diff --git a/pytorch/multinode/requirements.txt b/pytorch/multinode/requirements.txt index 53f579ca..c941708a 100644 --- a/pytorch/multinode/requirements.txt +++ b/pytorch/multinode/requirements.txt @@ -1,5 +1,5 @@ -neural-compressor==2.6 -oneccl_bind_pt==2.3.0+cpu ---extra-index-url https://developer.intel.com/ipex-whl-stable-cpu +neural-compressor==3.0 +oneccl_bind_pt==2.4.0+cpu +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ oneccl-devel>=2021.13.0 # required to build deepspeed ops mpi4py>=3.1.0 # required to build deepspeed ops diff --git a/pytorch/requirements.txt b/pytorch/requirements.txt index 6e20f9ea..33202d78 100644 --- a/pytorch/requirements.txt +++ b/pytorch/requirements.txt @@ -1,6 +1,6 @@ -torch==2.3.1 -torchvision==0.18.1 -torchaudio==2.3.1 +torch==2.4.0 +torchvision==0.19.0 +torchaudio==2.4.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html -intel_extension_for_pytorch==2.3.100+cpu ---extra-index-url https://developer.intel.com/ipex-whl-stable-cpu +intel_extension_for_pytorch==2.4.0+cpu +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ diff --git a/pytorch/serving/README.md b/pytorch/serving/README.md index 133c48f4..5e48251f 100644 --- a/pytorch/serving/README.md +++ b/pytorch/serving/README.md @@ -16,7 +16,7 @@ Follow the instructions found in the link above depending on whether you are int curl -O https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth docker run --rm -it \ -v $PWD:/home/model-server \ - intel/intel-optimized-pytorch:2.2.0-serving-cpu \ + intel/intel-optimized-pytorch:2.4.0-serving-cpu \ torch-model-archiver --model-name squeezenet \ --version 1.0 \ --model-file model-archive/model.py \ @@ -34,7 +34,7 @@ Test Torchserve with the new archived model. The example below is for the squeez docker run -d --rm --name server \ -v $PWD:/home/model-server/model-store \ --net=host \ - intel/intel-optimized-pytorch:2.2.0-serving-cpu + intel/intel-optimized-pytorch:2.4.0-serving-cpu # Verify that the container has launched successfully docker logs server # Attempt to register the model and make an inference request @@ -87,7 +87,7 @@ As demonstrated in the above example, models must be registered before they can -v $PWD:/home/model-server/model-store \ -v $PWD/config.properties:/home/model-server/config.properties \ --net=host \ - intel/intel-optimized-pytorch:2.2.0-serving-cpu + intel/intel-optimized-pytorch:2.4.0-serving-cpu # Verify that the container has launched successfully docker logs server # Check the models list diff --git a/pytorch/tests/tests.yaml b/pytorch/tests/tests.yaml index 1011c7a0..d903cece 100644 --- a/pytorch/tests/tests.yaml +++ b/pytorch/tests/tests.yaml @@ -13,34 +13,34 @@ # limitations under the License. import-ipex-cpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base cmd: python -c "import torch;import intel_extension_for_pytorch as ipex;print(f'torch {torch.__version__} ipex {ipex.__version__}')" import-ipex-xpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-base cmd: python -c "import torch; import intel_extension_for_pytorch as ipex;[print(f'[{i}] {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())];" device: ["/dev/dri"] import-cpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-jupyter cmd: python -m jupyter --version import-xpu-jupyter-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-jupyter cmd: python -m jupyter --version device: ["/dev/dri"] import-cpu-oneccl-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: python -c "'import oneccl_bindings_for_pytorch as oneccl;print(oneccl.__version__)'" import-cpu-transformers-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-hf-${HF_VERSION:-4.41.2} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-hf-${HF_VERSION:-4.44.0} cmd: python -c "import transformers;print(f'transformers {transformers.__version__}');assert transformers.utils.import_utils.is_ipex_available()" import-cpu-inc-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: python -c "'import neural_compressor as inc;print(inc.__version__)'" import-cpu-deepspeed-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: ds_report shm_size: 2gb ipex-cpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base cmd: python /tests/ipex-resnet50.py --ipex --device cpu --backend gloo volumes: - dst: /tests @@ -58,21 +58,21 @@ ipex-xpu-jupyter-${PACKAGE_OPTION:-pip}: device: ["/dev/dri"] notebook: True oneccl-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: ipexrun cpu /tests/ipex-resnet50.py --ipex --device cpu --backend ccl privileged: true volumes: - dst: /tests src: $PWD/pytorch/tests oneccl-ds-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: ipexrun cpu /tests/ipex-resnet50.py --ipex --device cpu --backend ccl --deepspeed privileged: true volumes: - dst: /tests src: $PWD/pytorch/tests inc-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: python /tests/inc-quant.py volumes: - dst: /tests diff --git a/pytorch/torchserve-requirements.txt b/pytorch/torchserve-requirements.txt index 0dbb45d5..f495a686 100644 --- a/pytorch/torchserve-requirements.txt +++ b/pytorch/torchserve-requirements.txt @@ -7,6 +7,6 @@ torch-model-archiver==0.11.1 torch-workflow-archiver==0.2.14 torchserve==0.11.1 torchtext==0.18.0 -torchvision==0.18.1 --f https://developer.intel.com/ipex-whl-stable-cpu -intel_extension_for_pytorch==2.3.100+cpu +torchvision==0.19.0 +-f https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +intel_extension_for_pytorch==2.4.0+cpu diff --git a/pytorch/xpu-requirements.txt b/pytorch/xpu-requirements.txt index b64b92a4..73129f9d 100644 --- a/pytorch/xpu-requirements.txt +++ b/pytorch/xpu-requirements.txt @@ -3,5 +3,5 @@ torchvision==0.16.0.post2+cxx11.abi torchaudio==2.1.0.post2+cxx11.abi intel_extension_for_pytorch==2.1.30+xpu oneccl_bind_pt==2.1.300+xpu ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ setuptools==71.1.0 From 4a7f238c7c625dca543644d8633042b968bd290d Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Wed, 14 Aug 2024 13:18:47 -0700 Subject: [PATCH 09/50] update ipex layers to 2.1.40-xpu (#305) Signed-off-by: Srikanth Ramakrishna Signed-off-by: sharvil10 Signed-off-by: Tyler Titsworth Signed-off-by: dependabot[bot] Signed-off-by: tylertitsworth Signed-off-by: Srikanth Ramakrishna Co-authored-by: Sharvil Shah Co-authored-by: Tyler Titsworth Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- pytorch/Dockerfile | 18 +++-------- pytorch/README.md | 18 +++++++---- pytorch/docker-compose.yaml | 58 ++++++++++++++++-------------------- pytorch/tests/tests.yaml | 8 ++--- pytorch/xpu-requirements.txt | 15 +++++----- 5 files changed, 53 insertions(+), 64 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 0a68ac58..1a5b497d 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -36,11 +36,6 @@ ARG PYTHON_VERSION ARG PYTHON_BASE=${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER}-${BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${PACKAGE_OPTION}-py${PYTHON_VERSION}-base FROM ${PYTHON_BASE} AS ipex-base-pip -ARG IPEX_VERSION -ARG PYTORCH_VERSION -ARG TORCHAUDIO_VERSION -ARG TORCHVISION_VERSION - WORKDIR / COPY requirements.txt . @@ -49,11 +44,6 @@ RUN python -m pip install --no-cache-dir -r requirements.txt && \ FROM ${PYTHON_BASE} AS ipex-base-idp -ARG IPEX_VERSION -ARG PYTORCH_VERSION -ARG TORCHAUDIO_VERSION -ARG TORCHVISION_VERSION - WORKDIR / COPY requirements.txt . @@ -158,8 +148,8 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \ + gpg --dearmor --yes --output /usr/share/keyrings/intel-graphics.gpg +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified" | \ tee /etc/apt/sources.list.d/intel-gpu-jammy.list ARG ICD_VER @@ -171,8 +161,8 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ intel-opencl-icd=${ICD_VER} \ intel-level-zero-gpu=${LEVEL_ZERO_GPU_VER} \ - level-zero=${LEVEL_ZERO_VER} \ - level-zero-dev=${LEVEL_ZERO_DEV_VER} && \ + libze1=${LEVEL_ZERO_VER} \ + libze-dev=${LEVEL_ZERO_DEV_VER} && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/pytorch/README.md b/pytorch/README.md index de9a45b3..53adcb1d 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -24,6 +24,7 @@ The images below include support for both CPU and GPU optimizations: | Tag(s) | Pytorch | IPEX | Driver | Dockerfile | | ---------------------- | -------- | -------------- | ------ | --------------- | +| `2.1.40-xpu` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | | `2.1.30-xpu` | [v2.1.0] | [v2.1.30+xpu] | [803] | [v0.4.0-Beta] | | `2.1.20-xpu` | [v2.1.0] | [v2.1.20+xpu] | [803] | [v0.3.4] | | `2.1.10-xpu` | [v2.1.0] | [v2.1.10+xpu] | [736] | [v0.2.3] | @@ -36,7 +37,7 @@ docker run -it --rm \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ --ipc=host \ - intel/intel-extension-for-pytorch:2.1.30-xpu + intel/intel-extension-for-pytorch:2.1.40-xpu ``` --- @@ -45,8 +46,9 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Driver | Jupyter Port | Dockerfile | | --------------------- | -------- | ------------- | ------ | ------------ | --------------- | -| `2.1.20-xpu-pip-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | -| `2.1.10-xpu-pip-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | +| `2.1.40-xpu-pip-jupyter` | [v2.1.0] | [v2.1.40+xpu] | [914] | `8888` | [v0.4.0-Beta] | +| `2.1.20-xpu-pip-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | +| `2.1.10-xpu-pip-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | ### Run the XPU Jupyter Container @@ -55,7 +57,7 @@ docker run -it --rm \ -p 8888:8888 \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ - intel/intel-extension-for-pytorch:2.1.20-xpu-pip-jupyter + intel/intel-extension-for-pytorch:2.1.40-xpu-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -308,6 +310,7 @@ The images below are built only with CPU and GPU optimizations and include [Inte | Tag(s) | Pytorch | IPEX | Driver | Dockerfile | | ---------------- | -------- | ------------ | -------- | ------ | +| `2.1.40-xpu-idp-base` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | | `2.1.30-xpu-idp-base` | [v2.1.0] | [v2.1.30+xpu] | [803] | [v0.4.0-Beta] | | `2.1.10-xpu-idp-base` | [v2.1.0] | [v2.1.10+xpu] | [736] | [v0.2.3] | @@ -315,8 +318,9 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Driver | Jupyter Port | Dockerfile | | --------------------- | -------- | ------------- | ------ | ------------ | --------------- | -| `2.1.20-xpu-idp-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | -| `2.1.10-xpu-idp-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | +| `2.1.40-xpu-idp-jupyter` | [v2.1.0] | [v2.1.40+xpu] | [914] | `8888` | [v0.4.0-Beta] | +| `2.1.20-xpu-idp-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | +| `2.1.10-xpu-idp-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | ## Build from Source @@ -382,6 +386,7 @@ It is the image user's responsibility to ensure that any use of The images below [v0.2.3]: https://github.com/intel/ai-containers/blob/v0.2.3/pytorch/Dockerfile [v0.1.0]: https://github.com/intel/ai-containers/blob/v0.1.0/pytorch/Dockerfile +[v2.1.40+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.40%2Bxpu [v2.1.30+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.30%2Bxpu [v2.1.20+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.20%2Bxpu [v2.1.10+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.10%2Bxpu @@ -419,6 +424,7 @@ It is the image user's responsibility to ensure that any use of The images below [v4.44.0]: https://github.com/huggingface/transformers/releases/tag/v4.44.0 +[914]: https://dgpu-docs.intel.com/releases/stable_914_33_20240730.html [803]: https://dgpu-docs.intel.com/releases/LTS_803.29_20240131.html [736]: https://dgpu-docs.intel.com/releases/stable_736_25_20231031.html [647]: https://dgpu-docs.intel.com/releases/stable_647_21_20230714.html diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 75d2e8d2..838ee5cb 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -94,38 +94,34 @@ services: xpu: build: args: - CCL_VER: ${CCL_VER:-2021.12.0-309} - DPCPP_VER: ${DPCPP_VER:-2024.1.0-963} - ICD_VER: ${ICD_VER:-23.43.27642.40-803~22.04} - IPEX_XPU_VERSION: ${IPEX_VERSION:-2.1.20} - LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} - LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} - LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} - MKL_VER: ${MKL_VER:-2024.1.0-691} + CCL_VER: ${CCL_VER:-2021.13.1-31} + DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} + ICD_VER: ${ICD_VER:-24.22.29735.27-914~22.04} + LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} + LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} + LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} + MKL_VER: ${MKL_VER:-2024.2.1-103} NO_PROXY: '' - ONECCL_VERSION: ${ONECCL_VERSION:-2.1.300} PACKAGE_OPTION: ${PACKAGE_OPTION:-pip} - PYTORCH_XPU_VERSION: ${PYTORCH_VERSION:-2.1.0} - TORCHVISION_XPU_VERSION: ${TORCHVISION_VERSION:-0.16.0} labels: dependency.apt.build-essential: true dependency.apt.clinfo: true dependency.apt.git: true dependency.apt.gnupg2: true dependency.apt.gpg-agent: true - dependency.apt.intel-level-zero-gpu: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} - dependency.apt.intel-oneapi-runtime-ccl: ${CCL_VER:-2021.12.0-309} - dependency.apt.intel-oneapi-runtime-dpcpp-cpp: ${DPCPP_VER:-2024.1.0-963} - dependency.apt.intel-oneapi-runtime-mkl: ${MKL_VER:-2024.1.0-691} + dependency.apt.intel-level-zero-gpu: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} + dependency.apt.intel-oneapi-runtime-ccl: ${CCL_VER:-2021.13.1-31} + dependency.apt.intel-oneapi-runtime-dpcpp-cpp: ${DPCPP_VER:-2024.2.1-1079} + dependency.apt.intel-oneapi-runtime-mkl: ${MKL_VER:-2024.2.1-103} dependency.apt.intel-opencl-icd: ${ICD_VER:-23.43.27642.40-803~22.04} - dependency.apt.level-zero: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} - dependency.apt.level-zero-dev: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} + dependency.apt.level-zero: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} + dependency.apt.level-zero-dev: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} dependency.apt.rsync: true dependency.apt.unzip: true dependency.idp.pip: false org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Base Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.1.20}-xpu-${PACKAGE_OPTION:-pip}-base + org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base target: ipex-xpu-base command: > python -c "import torch;print(torch.device('xpu'));import @@ -135,33 +131,29 @@ services: {ipex.xpu.get_device_properties(i)}') for i in range(ipex.xpu.device_count())];" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base xpu-jupyter: build: args: - CCL_VER: ${CCL_VER:-2021.12.0-309} - DPCPP_VER: ${DPCPP_VER:-2024.1.0-963} - ICD_VER: ${ICD_VER:-23.43.27642.40-803~22.04} - IPEX_XPU_VERSION: ${IPEX_VERSION:-2.1.20} - LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} - LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} - LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} - MKL_VER: ${MKL_VER:-2024.1.0-691} + CCL_VER: ${CCL_VER:-2021.13.1-31} + DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} + ICD_VER: ${ICD_VER:-24.22.29735.27-914~22.04} + LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} + LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} + LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} + MKL_VER: ${MKL_VER:-2024.2.1-103} NO_PROXY: '' - ONECCL_VERSION: ${ONECCL_VERSION:-2.1.200} PACKAGE_OPTION: ${PACKAGE_OPTION:-pip} - PYTORCH_XPU_VERSION: ${PYTORCH_VERSION:-2.1.0} - TORCHVISION_XPU_VERSION: ${TORCHVISION_VERSION:-0.16.0} labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.1.20}-xpu-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Jupyter Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.1.20}-xpu-${PACKAGE_OPTION:-pip}-jupyter + org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-jupyter target: ipex-xpu-jupyter command: > bash -c "python -m jupyter --version" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter ports: - 8888:8888 torchserve: diff --git a/pytorch/tests/tests.yaml b/pytorch/tests/tests.yaml index d903cece..21aeeadc 100644 --- a/pytorch/tests/tests.yaml +++ b/pytorch/tests/tests.yaml @@ -16,14 +16,14 @@ import-ipex-cpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base cmd: python -c "import torch;import intel_extension_for_pytorch as ipex;print(f'torch {torch.__version__} ipex {ipex.__version__}')" import-ipex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base cmd: python -c "import torch; import intel_extension_for_pytorch as ipex;[print(f'[{i}] {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())];" device: ["/dev/dri"] import-cpu-jupyter-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-jupyter cmd: python -m jupyter --version import-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter cmd: python -m jupyter --version device: ["/dev/dri"] import-cpu-oneccl-${PACKAGE_OPTION:-pip}: @@ -46,14 +46,14 @@ ipex-cpu-${PACKAGE_OPTION:-pip}: - dst: /tests src: $PWD/pytorch/tests ipex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base cmd: python /tests/ipex-resnet50.py --ipex --device xpu device: ["/dev/dri"] volumes: - dst: /tests src: $PWD/pytorch/tests ipex-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.30xpu}-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter cmd: papermill --log-output /jupyter/xpu.ipynb -k python3 device: ["/dev/dri"] notebook: True diff --git a/pytorch/xpu-requirements.txt b/pytorch/xpu-requirements.txt index 73129f9d..5d7d2e8a 100644 --- a/pytorch/xpu-requirements.txt +++ b/pytorch/xpu-requirements.txt @@ -1,7 +1,8 @@ -torch==2.1.0.post2+cxx11.abi -torchvision==0.16.0.post2+cxx11.abi -torchaudio==2.1.0.post2+cxx11.abi -intel_extension_for_pytorch==2.1.30+xpu -oneccl_bind_pt==2.1.300+xpu ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -setuptools==71.1.0 +torch==2.1.0.post3+cxx11.abi +torchvision==0.16.0.post3+cxx11.abi +torchaudio==2.1.0.post3+cxx11.abi +intel_extension_for_pytorch==2.1.40+xpu +oneccl_bind_pt==2.1.400+xpu +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us +setuptools==69.5.1 +numpy==1.26.4 From 14fa971ea5c82384f72fbd7cd01841622b3d0f4c Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Wed, 14 Aug 2024 14:33:28 -0700 Subject: [PATCH 10/50] update pytorch cpu whl index (#312) Signed-off-by: tylertitsworth --- pytorch/requirements.txt | 8 ++++---- pytorch/serving/build-kfs.sh | 2 +- pytorch/serving/patch.yaml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pytorch/requirements.txt b/pytorch/requirements.txt index 33202d78..664b5ad8 100644 --- a/pytorch/requirements.txt +++ b/pytorch/requirements.txt @@ -1,6 +1,6 @@ -torch==2.4.0 -torchvision==0.19.0 -torchaudio==2.4.0 --f https://download.pytorch.org/whl/cpu/torch_stable.html +torch==2.4.0+cpu +torchvision==0.19.0+cpu +torchaudio==2.4.0+cpu +--extra-index-url https://download.pytorch.org/whl/cpu intel_extension_for_pytorch==2.4.0+cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ diff --git a/pytorch/serving/build-kfs.sh b/pytorch/serving/build-kfs.sh index 7cdedc93..89e30823 100755 --- a/pytorch/serving/build-kfs.sh +++ b/pytorch/serving/build-kfs.sh @@ -16,7 +16,7 @@ cd .. || exit docker compose pull torchserve -docker tag "$(docker images -q | head -n1)" intel/torchserve:latest +docker tag "${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-ubuntu-22.04-py3.10-torchserve" intel/torchserve:latest git clone https://github.com/pytorch/serve cd serve/kubernetes/kserve || exit git apply ../../../serving/kfs.patch diff --git a/pytorch/serving/patch.yaml b/pytorch/serving/patch.yaml index 487eab34..cd18e847 100644 --- a/pytorch/serving/patch.yaml +++ b/pytorch/serving/patch.yaml @@ -242,7 +242,7 @@ spec: - grpc-v1 containers: - name: kserve-container - image: "intel/intel-extension-for-pytorch:2.2.0-serving-cpu-kserve" + image: "intel/intel-extension-for-pytorch:2.4.0-serving-cpu-kserve" args: - torchserve - --start From 7b8b19b19a36d3ac281ab7b87eb393f63bf5bb2c Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 16 Aug 2024 09:24:00 -0700 Subject: [PATCH 11/50] Update Intel Optimized ML Docs (#313) Signed-off-by: tylertitsworth --- classical-ml/README.md | 15 ++++++++++++--- classical-ml/docker-compose.yaml | 10 +++++----- classical-ml/tests/tests.yaml | 6 +++--- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/classical-ml/README.md b/classical-ml/README.md index 06cfa613..9d63355c 100644 --- a/classical-ml/README.md +++ b/classical-ml/README.md @@ -10,7 +10,8 @@ The images below include [Intel® Extension for Scikit-learn*] and [XGBoost*]. | Tag(s) | Intel SKLearn | Scikit-learn | XGBoost | Dockerfile | | ------------------------------------------------- | -------------- | ------------ | -------- | --------------- | -| `2024.5.0-pip-base`, `latest` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.6.0-pip-base`, `latest` | [v2024.6.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.5.0-pip-base` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | | `2024.3.0-pip-base` | [v2024.3.0] | [v1.4.2] | [v2.0.3] | [v0.4.0-Beta] | | `2024.2.0-xgboost-2.0.3-pip-base` | [v2024.2.0] | [v1.4.1] | [v2.0.3] | [v0.4.0-Beta] | | `scikit-learning-2024.0.0-xgboost-2.0.2-pip-base` | [v2024.0.0] | [v1.3.2] | [v2.0.2] | [v0.3.4] | @@ -19,6 +20,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Intel SKLearn | Scikit-learn | XGBoost | Dockerfile | | ---------------------------------------------------- | -------------- | ------------ | -------- | --------------- | +| `2024.6.0-pip-jupyter` | [v2024.6.0] | [v1.5.1] | [v2.1.1] | [v0.4.0] | | `2024.5.0-pip-jupyter` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | | `2024.3.0-pip-jupyter` | [v2024.3.0] | [v1.4.2] | [v2.0.3] | [v0.4.0-Beta] | | `2024.2.0-xgboost-2.0.3-pip-jupyter` | [v2024.2.0] | [v1.4.1] | [v2.0.3] | [v0.4.0-Beta] | @@ -43,7 +45,9 @@ The images below include [Intel® Distribution for Python*]: | Tag(s) | Intel SKLearn | Scikit-learn | XGBoost | Dockerfile | | ------------------------------------------------- | -------------- | ------------ | -------- | --------------- | -| `2024.3.0-idp-base` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.6.0-idp-base` | [v2024.6.0] | [v1.5.1] | [v2.1.1] | [v0.4.0] | +| `2024.5.0-idp-base` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.3.0-idp-base` | [v2024.3.0] | [v1.4.1] | [v2.1.0] | [v0.4.0] | | `2024.2.0-xgboost-2.0.3-idp-base` | [v2024.2.0] | [v1.4.1] | [v2.0.3] | [v0.4.0-Beta] | | `scikit-learning-2024.0.0-xgboost-2.0.2-idp-base` | [v2024.0.0] | [v1.3.2] | [v2.0.2] | [v0.3.4] | @@ -51,7 +55,9 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Intel SKLearn | Scikit-learn | XGBoost | Dockerfile | | ---------------------------------------------------- | -------------- | ------------ | -------- | --------------- | -| `2024.3.0-idp-jupyter` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.6.0-idp-jupyter` | [v2024.6.0] | [v1.5.1] | [v2.1.1] | [v0.4.0] | +| `2024.5.0-idp-jupyter` | [v2024.5.0] | [v1.5.0] | [v2.1.0] | [v0.4.0] | +| `2024.3.0-idp-jupyter` | [v2024.3.0] | [v1.4.0] | [v2.1.0] | [v0.4.0] | | `2024.2.0-xgboost-2.0.3-idp-jupyter` | [v2024.2.0] | [v1.4.1] | [v2.0.3] | [v0.4.0-Beta] | | `scikit-learning-2024.0.0-xgboost-2.0.2-idp-jupyter` | [v2024.0.0] | [v1.3.2] | [v2.0.2] | [v0.3.4] | @@ -89,16 +95,19 @@ It is the image user's responsibility to ensure that any use of The images below [Scikit-learn*]: https://scikit-learn.org/stable/ [XGBoost*]: https://github.com/dmlc/xgboost +[v2024.6.0]: https://github.com/intel/scikit-learn-intelex/releases/tag/2024.6.0 [v2024.5.0]: https://github.com/intel/scikit-learn-intelex/releases/tag/2024.5.0 [v2024.3.0]: https://github.com/intel/scikit-learn-intelex/releases/tag/2024.3.0 [v2024.2.0]: https://github.com/intel/scikit-learn-intelex/releases/tag/2024.2.0 [v2024.0.0]: https://github.com/intel/scikit-learn-intelex/releases/tag/2024.0.0 +[v1.5.1]: https://github.com/scikit-learn/scikit-learn/releases/tag/1.5.1 [v1.5.0]: https://github.com/scikit-learn/scikit-learn/releases/tag/1.5.0 [v1.4.2]: https://github.com/scikit-learn/scikit-learn/releases/tag/1.4.2 [v1.4.1]: https://github.com/scikit-learn/scikit-learn/releases/tag/1.4.1 [v1.3.2]: https://github.com/scikit-learn/scikit-learn/releases/tag/1.3.2 +[v2.1.1]: https://github.com/dmlc/xgboost/releases/tag/v2.1.1 [v2.1.0]: https://github.com/dmlc/xgboost/releases/tag/v2.1.0 [v2.0.3]: https://github.com/dmlc/xgboost/releases/tag/v2.0.3 [v2.0.2]: https://github.com/dmlc/xgboost/releases/tag/v2.0.2 diff --git a/classical-ml/docker-compose.yaml b/classical-ml/docker-compose.yaml index 0a775bdc..491005de 100644 --- a/classical-ml/docker-compose.yaml +++ b/classical-ml/docker-compose.yaml @@ -40,21 +40,21 @@ services: org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.name: "intel/intel-optimized-ml" org.opencontainers.image.title: "Intel® Optimized ML Base Image" - org.opencontainers.image.version: ${SKLEARN_VERSION:-2024.4.0}-${PACKAGE_OPTION:-pip}-base + org.opencontainers.image.version: ${SKLEARN_VERSION:-2024.6.0}-${PACKAGE_OPTION:-pip}-base target: ml-base-${PACKAGE_OPTION:-pip} command: > bash -c "python -c 'import sklearnex, sklearn; import xgboost as xgb; print(\"Scikit version:\", sklearn.__version__, \"\\nXGBoost version:\", xgb.__version__)'" depends_on: - ${PACKAGE_OPTION:-pip} - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SCIKIT_VERSION:-2024.5.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SKLEARN_VERSION:-2024.6.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base pull_policy: always jupyter: build: labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-ml:${SKLEARN_VERSION:-2024.4.0}-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-ml:${SKLEARN_VERSION:-2024.6.0}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Optimized ML Jupyter Base Image" - org.opencontainers.image.version: ${SKLEARN_VERSION:-2024.4.0}-${PACKAGE_OPTION:-pip}-jupyter + org.opencontainers.image.version: ${SKLEARN_VERSION:-2024.6.0}-${PACKAGE_OPTION:-pip}-jupyter target: jupyter command: > bash -c "python -m jupyter --version" @@ -62,5 +62,5 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} extends: ml-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SCIKIT_VERSION:-2024.5.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SKLEARN_VERSION:-2024.6.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-jupyter network_mode: host diff --git a/classical-ml/tests/tests.yaml b/classical-ml/tests/tests.yaml index 0016987b..197dd285 100644 --- a/classical-ml/tests/tests.yaml +++ b/classical-ml/tests/tests.yaml @@ -14,13 +14,13 @@ --- classical-ml-import-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SCIKIT_VERSION:-2024.5.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SKLEARN_VERSION:-2024.6.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base cmd: python -c "from sklearnex import patch_sklearn; patch_sklearn();import xgboost as xgb; print(xgb.__version__)" classical-ml-import-${PACKAGE_OPTION:-pip}-jupyter: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SCIKIT_VERSION:-2024.5.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SKLEARN_VERSION:-2024.6.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-jupyter cmd: sh -c "python -m jupyter --version" classical-ml-performance-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SCIKIT_VERSION:-2024.5.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-scikit-learn-${SKLEARN_VERSION:-2024.6.0}-xgboost-${XGBOOST_VERSION:-2.1.0}-base cmd: python /tests/performance.py volumes: - src: $PWD/classical-ml/tests From d952100ba4bbf44826a25a92b33074bc16578347 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Fri, 16 Aug 2024 14:13:21 -0700 Subject: [PATCH 12/50] rework PR 310 for review (#314) Signed-off-by: Srikanth Ramakrishna --- tensorflow/Dockerfile | 7 +++---- tensorflow/README.md | 21 +++++++++++++++++++-- tensorflow/docker-compose.yaml | 26 +++++++++++++------------- tensorflow/tests/tests.yaml | 12 ++++++++---- tensorflow/xpu-requirements.txt | 4 ++-- 5 files changed, 45 insertions(+), 25 deletions(-) diff --git a/tensorflow/Dockerfile b/tensorflow/Dockerfile index 47dc728f..a54b5466 100644 --- a/tensorflow/Dockerfile +++ b/tensorflow/Dockerfile @@ -254,11 +254,10 @@ RUN no_proxy="" NO_PROXY="" apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ARG TF_VER="2.15.0" +ARG ITEX_VER="2.15.0.1" -RUN conda install intel-extension-for-tensorflow=${TF_VER}=*xpu* \ - -c https://software.repos.intel.com/python/conda \ - -c conda-forge +RUN conda install -n idp -y intel-extension-for-tensorflow=${ITEX_VER}=*xpu* \ + -c https://software.repos.intel.com/python/conda ENV LD_LIBRARY_PATH=/opt/conda/envs/idp/lib:$LD_LIBRARY_PATH diff --git a/tensorflow/README.md b/tensorflow/README.md index c92533ef..195cebdf 100644 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -37,6 +37,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | TensorFlow | IPEX | Driver | Dockerfile | | ------------- | ----------- | ------------- | ------ | --------------- | +| `2.15.0.1-xpu-jupyter` | [v2.15.1] | [v2.15.0.1] | [803.63]| [v0.4.0-Beta] | | `xpu-jupyter` | [v2.14.1] | [v2.14.0.1] | [736] | [v0.3.4] | ### Run the XPU Jupyter Container @@ -48,7 +49,7 @@ docker run -it --rm \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ --ipc=host \ - intel/intel-extension-for-tensorflow:xpu-jupyter + intel/intel-extension-for-tensorflow:2.15.0.1-xpu-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -104,7 +105,7 @@ docker run -it --rm \ --net=host \ -v $PWD/workspace:/workspace \ -w /workspace \ - intel/intel-extension-for-tensorflow:xpu-jupyter + intel/intel-extension-for-tensorflow:2.15.0-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -170,6 +171,22 @@ The images below additionally include [Horovod]: | `2.14.0-idp-openmpi-multinode` | [v2.14.1] | [v2.14.0.1] | [v0.28.1] | [v0.3.4] | | `2.13-idp-openmpi-mulitnode` | [v2.13.0] | [v2.13.0.0] | [v0.28.0] | [v0.2.3] | +## XPU images with Intel® Distribution for Python* + +The images below are built only with CPU and GPU optimizations and include [Intel® Distribution for Python*]: + +| Tag(s) | Pytorch | ITEX | Driver | Dockerfile | +| ---------------- | -------- | ------------ | -------- | ------ | +| `2.15.0.1-xpu-idp-base` | [v2.15.1] | [v2.15.0.1] | [803] | [v0.4.0-Beta] | +| `2.15.0-xpu-idp-base` | [v2.15.0] | [v2.15.0.0] | [803] | [v0.4.0-Beta] | + +The images below additionally include [Jupyter Notebook](https://jupyter.org/) server: + +| Tag(s) | Pytorch | IPEX | Driver | Jupyter Port | Dockerfile | +| --------------------- | -------- | ------------- | ------ | ------------ | --------------- | +| `2.15.0.1-xpu-idp-jupyter` | [v2.15.1] | [v2.15.0.1] | [803] | `8888` | [v0.4.0-Beta] | +| `2.15.0-xpu-idp-jupyter` | [v2.1.0] | [v2.15.0.0] | [803] | `8888` | [v0.4.0-Beta] | + ## Build from Source To build the images from source, clone the [Intel® AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: diff --git a/tensorflow/docker-compose.yaml b/tensorflow/docker-compose.yaml index 2d7e84a0..9583b296 100644 --- a/tensorflow/docker-compose.yaml +++ b/tensorflow/docker-compose.yaml @@ -95,10 +95,10 @@ services: LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} - DPCPP_VER: ${DPCPP_VER:-2024.1.0-963} - MKL_VER: ${MKL_VER:-2024.1.0-691} - CCL_VER: ${CCL_VER:-2021.12.0-309} - TF_VER: ${TF_VER:-2.15.0} + DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} + MKL_VER: ${MKL_VER:-2024.2.1-103} + CCL_VER: ${CCL_VER:-2021.13.1-31} + TF_VER: ${TF_VER:-2.15.1} no_proxy: '' NO_PROXY: '' labels: @@ -108,9 +108,9 @@ services: dependency.apt.gnupg2: true dependency.apt.gpg-agent: true dependency.apt.intel-level-zero-gpu: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} - dependency.apt.intel-oneapi-runtime-ccl: ${CCL_VER:-2021.12.0-309} - dependency.apt.intel-oneapi-runtime-dpcpp-cpp: ${DPCPP_VER:-2024.1.0-963} - dependency.apt.intel-oneapi-runtime-mkl: ${MKL_VER:-2024.1.0-691} + dependency.apt.intel-oneapi-runtime-ccl: ${CCL_VER:-2021.13.1-31} + dependency.apt.intel-oneapi-runtime-dpcpp-cpp: ${DPCPP_VER:-2024.2.1-1079} + dependency.apt.intel-oneapi-runtime-mkl: ${MKL_VER:-2024.2.1-103} dependency.apt.intel-opencl-icd: ${ICD_VER:-23.43.27642.40-803~22.04} dependency.apt.level-zero: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} dependency.apt.level-zero-dev: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} @@ -124,7 +124,7 @@ services: command: > sh -c "python -c 'import tensorflow as tf;print(tf.__version__);from tensorflow.python.client import device_lib;print(device_lib.list_local_devices())'" extends: tf-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-base xpu-jupyter: build: args: @@ -132,10 +132,10 @@ services: LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.27642.40-803~22.04} LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.14.0-744~22.04} LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.14.0-744~22.04} - DPCPP_VER: ${DPCPP_VER:-2024.1.0-963} - MKL_VER: ${MKL_VER:-2024.1.0-691} - CCL_VER: ${CCL_VER:-2021.12.0-309} - TF_VER: ${TF_VER:-2.15.0} + DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} + MKL_VER: ${MKL_VER:-2024.2.1-103} + CCL_VER: ${CCL_VER:-2021.13.1-31} + ITEX_VER: ${ITEX_VER:-2.15.0.1} no_proxy: '' NO_PROXY: '' labels: @@ -147,4 +147,4 @@ services: extends: tf-base command: > bash -c "python -m jupyter --version" - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-jupyter diff --git a/tensorflow/tests/tests.yaml b/tensorflow/tests/tests.yaml index 0d45d9e8..0fa5b2b3 100644 --- a/tensorflow/tests/tests.yaml +++ b/tensorflow/tests/tests.yaml @@ -17,8 +17,9 @@ import-itex-cpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-base cmd: python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())" import-itex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-base cmd: python /tests/xpu_import_test.py + device: ["/dev/dri"] volumes: - src: ${PWD}/tensorflow/tests dst: /tests @@ -26,8 +27,9 @@ import-cpu-jupyter-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-jupyter cmd: python -m jupyter --version import-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-jupyter cmd: python -m jupyter --version + device: ["/dev/dri"] import-multinode-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} cmd: horovodrun --check-build && mpirun --version && python -c 'import horovod.tensorflow as hvd;hvd.init();import horovod.tensorflow' @@ -41,15 +43,17 @@ itex-cpu-${PACKAGE_OPTION:-pip}: - src: ${PWD}/tensorflow/tests dst: /tests itex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-base cmd: python /tests/tf_base_test.py + device: ["/dev/dri"] volumes: - dst: /tests src: $PWD/tensorflow/tests itex-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.0}-itex-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-jupyter cmd: papermill --log-output /jupyter/xpu.ipynb -k python3 - notebook: True + device: ["/dev/dri"] multinode-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} cmd: horovodrun -np 2 -H localhost:2 --binding-args="-bind-to socket -map-by socket" python /tests/tf_base_test.py diff --git a/tensorflow/xpu-requirements.txt b/tensorflow/xpu-requirements.txt index c7099048..0280ef9d 100644 --- a/tensorflow/xpu-requirements.txt +++ b/tensorflow/xpu-requirements.txt @@ -1,2 +1,2 @@ -tensorflow==2.15.0 -intel-extension-for-tensorflow[xpu]==2.15.0.0 +tensorflow==2.15.1 +intel-extension-for-tensorflow[xpu]==2.15.0.1 From 47d5dfb067cce9ee5a60322671234979eb919cec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 19:23:50 -0700 Subject: [PATCH 13/50] Bump the tensorflow group across 1 directory with 8 updates (#315) Signed-off-by: dependabot[bot] Signed-off-by: tylertitsworth Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: tylertitsworth --- tensorflow/Dockerfile | 150 +++++++-------------- tensorflow/README.md | 103 +++++++++++++- tensorflow/docker-compose.yaml | 39 +++--- tensorflow/hvd-requirements.txt | 1 - tensorflow/jupyter-requirements.txt | 4 +- tensorflow/multinode-requirements.txt | 3 - tensorflow/multinode/dockerd-entrypoint.sh | 21 +++ tensorflow/multinode/generate_ssh_keys.sh | 28 ++++ tensorflow/multinode/requirements.txt | 5 + tensorflow/multinode/ssh_config | 4 + tensorflow/multinode/sshd_config | 12 ++ tensorflow/ompi-requirements.txt | 1 - tensorflow/requirements.txt | 6 +- tensorflow/serving/requirements.txt | 8 +- tensorflow/tests/tests.yaml | 14 +- tensorflow/xpu-requirements.txt | 2 +- 16 files changed, 258 insertions(+), 143 deletions(-) delete mode 100644 tensorflow/hvd-requirements.txt delete mode 100644 tensorflow/multinode-requirements.txt create mode 100755 tensorflow/multinode/dockerd-entrypoint.sh create mode 100755 tensorflow/multinode/generate_ssh_keys.sh create mode 100644 tensorflow/multinode/requirements.txt create mode 100644 tensorflow/multinode/ssh_config create mode 100644 tensorflow/multinode/sshd_config delete mode 100644 tensorflow/ompi-requirements.txt diff --git a/tensorflow/Dockerfile b/tensorflow/Dockerfile index a54b5466..48fb7332 100644 --- a/tensorflow/Dockerfile +++ b/tensorflow/Dockerfile @@ -33,12 +33,11 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \ KMP_BLOCKTIME=1 \ KMP_SETTINGS=1 -ARG TF_VERSION - WORKDIR / COPY requirements.txt . -RUN python -m pip install --no-cache-dir -r requirements.txt +RUN python -m pip install --no-cache-dir -r requirements.txt && \ + rm -rf requirements.txt ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/ ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/ @@ -53,12 +52,13 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \ ENV PATH /usr/bin:/root/conda/envs/idp/bin:/root/conda/condabin:~/conda/bin/:${PATH} ENV TF_ENABLE_ONEDNN_OPTS=1 -ARG TF_VERSION WORKDIR / COPY requirements.txt . -RUN python -m pip install --no-cache-dir -r requirements.txt +RUN conda run -n idp python -m pip install --no-cache-dir -r requirements.txt && \ + rm -rf requirements.txt && \ + conda clean -y --all ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/ ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/ @@ -77,37 +77,43 @@ EXPOSE 8888 CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX"] -FROM tf-base-${PACKAGE_OPTION} AS openmpi +FROM tf-base-${PACKAGE_OPTION} AS multinode RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + cmake \ + g++ \ + gcc \ + git \ + libgl1-mesa-glx \ + libglib2.0-0 \ libopenmpi-dev \ + numactl \ openmpi-bin \ - openmpi-common + openmpi-common \ + python3-dev \ + unzip \ + virtualenv -WORKDIR / -COPY ompi-requirements.txt . +ENV SIGOPT_PROJECT=. -RUN python -m pip install --no-cache-dir -r ompi-requirements.txt +WORKDIR / +COPY multinode/requirements.txt requirements.txt -FROM openmpi AS horovod +RUN python -m pip install --no-cache-dir -r requirements.txt && \ + rm -rf requirements.txt -ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64 +ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" RUN apt-get install -y --no-install-recommends --fix-missing \ - unzip \ openssh-client \ openssh-server && \ - rm /etc/ssh/ssh_host_*_key \ - /etc/ssh/ssh_host_*_key.pub - -ENV OMPI_ALLOW_RUN_AS_ROOT=1 -ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - -ENV OMPI_MCA_tl_tcp_if_exclude="lo,docker0" + rm /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* -# Install OpenSSH for MPI to communicate between containers -RUN mkdir -p /var/run/sshd && \ - echo 'LoginGraceTime 0' >> /etc/ssh/sshd_config +RUN mkdir -p /var/run/sshd # Install Horovod ARG HOROVOD_WITH_TENSORFLOW=1 @@ -116,43 +122,32 @@ ARG HOROVOD_WITHOUT_PYTORCH=1 ARG HOROVOD_WITHOUT_GLOO=1 ARG HOROVOD_WITH_MPI=1 -RUN apt-get install -y --no-install-recommends --fix-missing \ - build-essential \ - cmake \ - g++ \ - gcc \ - git \ - libgl1-mesa-glx \ - libglib2.0-0 \ - python3-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR / -COPY hvd-requirements.txt . - -RUN python -m pip install --no-cache-dir -r hvd-requirements.txt - -ENV SIGOPT_PROJECT=. - -RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \ - wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE +ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64 -FROM horovod AS multinode-pip +RUN python -m pip install --no-cache-dir horovod==0.28.1 -WORKDIR / -COPY multinode-requirements.txt . +ARG PYTHON_VERSION -RUN python -m pip install --no-cache-dir -r multinode-requirements.txt +COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh -FROM horovod AS multinode-idp +# modify generate_ssh_keys to be a helper script +# print how to use helper script on bash startup +# Avoids loop for further execution of the startup file +ARG PACKAGE_OPTION=pip +ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" +RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ + echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ + cat '/generate_ssh_keys.sh' >> ~/.startup && \ + rm -rf /generate_ssh_keys.sh -WORKDIR / -COPY multinode-requirements.txt . +COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY multinode/sshd_config /etc/ssh/sshd_config +COPY multinode/ssh_config /etc/ssh/ssh_config -RUN python -m pip install --no-cache-dir -r multinode-requirements.txt +RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \ + wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE -FROM ${PYTHON_BASE} AS itex-xpu-base-pip +FROM ${PYTHON_BASE} AS itex-xpu-base RUN apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ @@ -219,54 +214,7 @@ ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/maste ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH -FROM ${PYTHON_BASE} AS itex-xpu-base-idp - -RUN apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - apt-utils \ - build-essential \ - clinfo \ - git \ - gnupg2 \ - gpg-agent \ - rsync \ - unzip \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ARG ICD_VER -ARG LEVEL_ZERO_GPU_VER -ARG LEVEL_ZERO_VER -ARG LEVEL_ZERO_DEV_VER - -RUN no_proxy="" NO_PROXY="" wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \ - tee /etc/apt/sources.list.d/intel-gpu-jammy.list - -RUN no_proxy="" NO_PROXY="" apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - intel-opencl-icd=${ICD_VER} \ - intel-level-zero-gpu=${LEVEL_ZERO_GPU_VER} \ - level-zero=${LEVEL_ZERO_VER} \ - level-zero-dev=${LEVEL_ZERO_DEV_VER} && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ARG ITEX_VER="2.15.0.1" - -RUN conda install -n idp -y intel-extension-for-tensorflow=${ITEX_VER}=*xpu* \ - -c https://software.repos.intel.com/python/conda - -ENV LD_LIBRARY_PATH=/opt/conda/envs/idp/lib:$LD_LIBRARY_PATH - -ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/ -ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/ -ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-tensorflow.txt /licenses/ -ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-optimization-for-horovod.txt /licenses/ - -FROM itex-xpu-base-${PACKAGE_OPTION} AS itex-xpu-jupyter +FROM itex-xpu-base AS itex-xpu-jupyter WORKDIR /jupyter COPY jupyter-requirements.txt . diff --git a/tensorflow/README.md b/tensorflow/README.md index 195cebdf..ac2c8b7c 100644 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -85,7 +85,8 @@ The images below are built only with CPU optimizations (GPU acceleration support | Tag(s) | TensorFlow | ITEX | Dockerfile | | --------------------------- | ----------- | ------------ | --------------- | -| `2.15.0-pip-base`, `latest` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | +| `2.15.1-pip-base`, `latest` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] | +| `2.15.0-pip-base` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | | `2.14.0-pip-base` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] | | `2.13-pip-base` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] | @@ -93,6 +94,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | TensorFlow | ITEX | Dockerfile | | -------------------- | ----------- | ------------- | --------------- | +| `2.15.1-pip-jupyter` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] | | `2.15.0-pip-jupyter` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | | `2.14.0-pip-jupyter` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] | | `2.13-pip-jupyter` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] | @@ -105,7 +107,7 @@ docker run -it --rm \ --net=host \ -v $PWD/workspace:/workspace \ -w /workspace \ - intel/intel-extension-for-tensorflow:2.15.0-pip-jupyter + intel/intel-extension-for-tensorflow:2.15.1-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -116,10 +118,102 @@ The images below additionally include [Horovod]: | Tag(s) | Tensorflow | ITEX | Horovod | Dockerfile | | ------------------------------ | --------- | ------------ | --------- | --------------- | +| `2.15.1-pip-multinode` | [v2.15.1] | [v2.15.0.1] | [v0.28.1] | [v0.4.0-Beta] | | `2.15.0-pip-multinode` | [v2.15.0] | [v2.15.0.0] | [v0.28.1] | [v0.4.0-Beta] | | `2.14.0-pip-openmpi-multinode` | [v2.14.1] | [v2.14.0.1] | [v0.28.1] | [v0.3.4] | | `2.13-pip-openmpi-mulitnode` | [v2.13.0] | [v2.13.0.0] | [v0.28.0] | [v0.2.3] | +> [!NOTE] +> Passwordless SSH connection is also enabled in the image, but the container does not contain any SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/etc/ssh/authorized_keys`. + +> [!TIP] +> Before mounting any keys, modify the permissions of those files with `chmod 600 authorized_keys; chmod 600 id_rsa` to grant read access for the default user account. + +#### Setup and Run ITEX Multi-Node Container + +Some additional assembly is required to utilize this container with OpenSSH. To perform any kind of DDP (Distributed Data Parallel) execution, containers are assigned the roles of launcher and worker respectively: + +SSH Server (Worker) + +1. *Authorized Keys* : `/etc/ssh/authorized_keys` + +SSH Client (Launcher) + +1. *Private User Key* : `/root/.ssh/id_rsa` + +To add these files correctly please follow the steps described below. + +1. Setup ID Keys + + You can use the commands provided below to [generate the identity keys](https://www.ssh.com/academy/ssh/keygen#creating-an-ssh-key-pair-for-user-authentication) for OpenSSH. + + ```bash + ssh-keygen -q -N "" -t rsa -b 4096 -f ./id_rsa + touch authorized_keys + cat id_rsa.pub >> authorized_keys + ``` + +2. Configure the permissions and ownership for all of the files you have created so far + + ```bash + chmod 600 id_rsa config authorized_keys + chown root:root id_rsa.pub id_rsa config authorized_keys + ``` + +3. Create a hostfile for horovod. (Optional) + + ```txt + Host host1 + HostName + IdentitiesOnly yes + IdentityFile ~/.root/id_rsa + Port + Host host2 + HostName + IdentitiesOnly yes + IdentityFile ~/.root/id_rsa + Port + ... + ``` + +4. Configure [Horovod] in your python script + + ```python + import horovod.torch as hvd + + hvd.init() + ``` + +5. Now start the workers and execute DDP on the launcher + + 1. Worker run command: + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/etc/ssh/authorized_keys \ + -v $PWD/tests:/workspace/tests \ + -w /workspace \ + intel/intel-optimized-tensorflow:2.15.1-pip-multinode \ + bash -c '/usr/sbin/sshd -D' + ``` + + 2. Launcher run command: + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/tests:/workspace/tests \ + -v $PWD/hostfile:/root/ssh/config \ + -w /workspace \ + intel/intel-optimized-tensorflow:2.15.1-pip-multinode \ + bash -c 'horovodrun --verbose -np 2 -H host1:1,host2:1 /workspace/tests/tf_base_test.py' + ``` + +> [!NOTE] +> [Intel® MPI] can be configured based on your machine settings. If the above commands do not work for you, see the documentation for how to configure based on your network. + --- The images below are [TensorFlow* Serving] with CPU Optimizations: @@ -151,7 +245,8 @@ The images below are built only with CPU optimizations (GPU acceleration support | Tag(s) | TensorFlow | ITEX | Dockerfile | | --------------------------- | ----------- | ------------ | --------------- | -| `2.15.0-idp-base`, `latest` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | +| `2.15.1-idp-base` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] | +| `2.15.0-idp-base` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | | `2.14.0-idp-base` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] | | `2.13-idp-base` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] | @@ -159,6 +254,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | TensorFlow | ITEX | Dockerfile | | -------------------- | ----------- | ------------- | --------------- | +| `2.15.1-idp-jupyter` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] | | `2.15.0-idp-jupyter` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] | | `2.14.0-idp-jupyter` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] | | `2.13-idp-jupyter` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] | @@ -167,6 +263,7 @@ The images below additionally include [Horovod]: | Tag(s) | Tensorflow | ITEX | Horovod | Dockerfile | | ------------------------------ | --------- | ------------ | --------- | --------------- | +| `2.15.1-idp-multinode` | [v2.15.1] | [v2.15.0.1] | [v0.28.1] | [v0.4.0-Beta] | | `2.15.0-idp-multinode` | [v2.15.0] | [v2.15.0.0] | [v0.28.1] | [v0.4.0-Beta] | | `2.14.0-idp-openmpi-multinode` | [v2.14.1] | [v2.14.0.1] | [v0.28.1] | [v0.3.4] | | `2.13-idp-openmpi-mulitnode` | [v2.13.0] | [v2.13.0.0] | [v0.28.0] | [v0.2.3] | diff --git a/tensorflow/docker-compose.yaml b/tensorflow/docker-compose.yaml index 9583b296..18aec65a 100644 --- a/tensorflow/docker-compose.yaml +++ b/tensorflow/docker-compose.yaml @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -version: '3' include: - path: - ../python/docker-compose.yaml @@ -31,7 +30,7 @@ services: PYTHON_VERSION: ${PYTHON_VERSION:-3.10} REGISTRY: ${REGISTRY} REPO: ${REPO} - TF_VERSION: ${TF_VERSION:-2.15.0} + TF_VERSION: ${TF_VERSION:-2.15.1} target: tf-base-${PACKAGE_OPTION:-pip} context: . labels: @@ -41,20 +40,20 @@ services: org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.name: "intel/intel-optimized-tensorflow" org.opencontainers.image.title: "Intel® Extension for TensorFlow Base Image" - org.opencontainers.image.version: ${TF_VERSION:-2.15.0}-${PACKAGE_OPTION:-pip}-base + org.opencontainers.image.version: ${TF_VERSION:-2.15.1}-${PACKAGE_OPTION:-pip}-base depends_on: - ${PACKAGE_OPTION:-pip} command: > python -c 'import tensorflow as tf; print("Tensorflow Version:", tf.__version__)' - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-base pull_policy: always jupyter: build: labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.0}-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.1}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for TensorFlow Jupyter Image" - org.opencontainers.image.version: ${TF_VERSION:-2.15.0}-${PACKAGE_OPTION:-pip}-jupyter + org.opencontainers.image.version: ${TF_VERSION:-2.15.1}-${PACKAGE_OPTION:-pip}-jupyter target: jupyter command: > bash -c "python -m jupyter --version" @@ -62,32 +61,38 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} extends: tf-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-jupyter network_mode: host volumes: - /$PWD:/jupyter multinode: build: labels: + dependency.apt.build-essential: true + dependency.apt.cmake: true dependency.apt.gcc: true + dependency.apt.g++: true + dependency.apt.git: true dependency.apt.libgl1-mesa-glx: true dependency.apt.libglib2: true - dependency.apt.python3-dev: true - dependency.pip.apt.virtualenv: true dependency.apt.libopenmpi-dev: true + dependency.apt.numactl: true dependency.apt.openmpi-bin: true - dependency.apt.unzip: true dependency.apt.openssh-client: true dependency.apt.openssh-server: true - dependency.python.pip: multinode-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.0}-${PACKAGE_OPTION:-pip}-base" + dependency.apt.python3-dev: true + dependency.apt.unzip: true + dependency.pip.apt.virtualenv: true + dependency.pip.horovod: 0.28.1 + dependency.python.pip: multinode/requirements.txt + org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.1}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for TensorFlow MultiNode Image" - org.opencontainers.image.version: ${TF_VERSION:-2.15.0}-${PACKAGE_OPTION:-pip}-multinode - target: multinode-${PACKAGE_OPTION:-pip} + org.opencontainers.image.version: ${TF_VERSION:-2.15.1}-${PACKAGE_OPTION:-pip}-multinode + target: multinode command: > bash -c "horovodrun --check-build && mpirun --version && python -c 'import horovod.tensorflow as hvd;hvd.init();import horovod.tensorflow;import neural_compressor, tf2onnx; print(\"\\nNeural Compressor Version:\", neural_compressor.__version__, \"\\\nTensorFlow2ONNX Version:\", tf2onnx.__version__)'" extends: tf-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-3.0} xpu: build: args: @@ -120,7 +125,7 @@ services: org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for TensorFlow XPU Base Image" org.opencontainers.image.version: ${TF_VER:-2.15.0}-xpu-${PACKAGE_OPTION:-pip}-base - target: itex-xpu-base-${PACKAGE_OPTION:-pip} + target: itex-xpu-base command: > sh -c "python -c 'import tensorflow as tf;print(tf.__version__);from tensorflow.python.client import device_lib;print(device_lib.list_local_devices())'" extends: tf-base @@ -140,7 +145,7 @@ services: NO_PROXY: '' labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.0}-xpu-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-tensorflow:${TF_VERSION:-2.15.1}-xpu-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for TensorFlow XPU Jupyter Image" org.opencontainers.image.version: ${TF_VER:-2.15.0}-xpu-${PACKAGE_OPTION:-pip}-jupyter target: itex-xpu-jupyter diff --git a/tensorflow/hvd-requirements.txt b/tensorflow/hvd-requirements.txt deleted file mode 100644 index f2eadcce..00000000 --- a/tensorflow/hvd-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -horovod==0.28.1 diff --git a/tensorflow/jupyter-requirements.txt b/tensorflow/jupyter-requirements.txt index 23a73885..9bdbed92 100644 --- a/tensorflow/jupyter-requirements.txt +++ b/tensorflow/jupyter-requirements.txt @@ -1,4 +1,4 @@ -jupyterlab==4.3.0a0 +jupyterlab>=4.2.4 jupyterhub==5.1.0 -notebook==7.3.0a0 +notebook>=7.1.3 jupyter-server-proxy>=4.1.2 diff --git a/tensorflow/multinode-requirements.txt b/tensorflow/multinode-requirements.txt deleted file mode 100644 index d9cff369..00000000 --- a/tensorflow/multinode-requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -cython -tf2onnx -neural-compressor==2.6 diff --git a/tensorflow/multinode/dockerd-entrypoint.sh b/tensorflow/multinode/dockerd-entrypoint.sh new file mode 100755 index 00000000..ba13c0f9 --- /dev/null +++ b/tensorflow/multinode/dockerd-entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +set -a +# shellcheck disable=SC1091 +source "$HOME/.startup" +set +a +"$@" diff --git a/tensorflow/multinode/generate_ssh_keys.sh b/tensorflow/multinode/generate_ssh_keys.sh new file mode 100755 index 00000000..0ee61398 --- /dev/null +++ b/tensorflow/multinode/generate_ssh_keys.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +function gen_single_key() { + ALG_NAME=$1 + if [[ ! -f /etc/ssh/ssh_host_${ALG_NAME}_key ]]; then + ssh-keygen -q -N "" -t "${ALG_NAME}" -f "/etc/ssh/ssh_host_${ALG_NAME}_key" + fi +} + +gen_single_key dsa +gen_single_key rsa +gen_single_key ecdsa +gen_single_key ed25519 diff --git a/tensorflow/multinode/requirements.txt b/tensorflow/multinode/requirements.txt new file mode 100644 index 00000000..80747740 --- /dev/null +++ b/tensorflow/multinode/requirements.txt @@ -0,0 +1,5 @@ +cython>=3.0.11 +impi-rt>=2021.12.0 +mpi4py>=3.1.0 +neural-compressor==3.0 +tf2onnx>=1.16.1 diff --git a/tensorflow/multinode/ssh_config b/tensorflow/multinode/ssh_config new file mode 100644 index 00000000..9ac73017 --- /dev/null +++ b/tensorflow/multinode/ssh_config @@ -0,0 +1,4 @@ +Host * + Port 3022 + IdentityFile ~/.ssh/id_rsa + StrictHostKeyChecking no diff --git a/tensorflow/multinode/sshd_config b/tensorflow/multinode/sshd_config new file mode 100644 index 00000000..4796a48a --- /dev/null +++ b/tensorflow/multinode/sshd_config @@ -0,0 +1,12 @@ +HostKey /etc/ssh/ssh_host_dsa_key +HostKey /etc/ssh/ssh_host_rsa_key +HostKey /etc/ssh/ssh_host_ecdsa_key +HostKey /etc/ssh/ssh_host_ed25519_key +AuthorizedKeysFile /etc/ssh/authorized_keys +## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time +LogLevel DEBUG3 +Port 3022 +UsePAM yes +Subsystem sftp /usr/lib/openssh/sftp-server +# https://ubuntu.com/security/CVE-2024-6387 +LoginGraceTime 0 diff --git a/tensorflow/ompi-requirements.txt b/tensorflow/ompi-requirements.txt deleted file mode 100644 index 7b64c166..00000000 --- a/tensorflow/ompi-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -impi-rt>=2021.12.0 diff --git a/tensorflow/requirements.txt b/tensorflow/requirements.txt index 9b50ec78..92fd1059 100644 --- a/tensorflow/requirements.txt +++ b/tensorflow/requirements.txt @@ -1,4 +1,4 @@ -tensorflow==2.15.0 -intel-extension-for-tensorflow[cpu]==2.15.0.0 +tensorflow==2.15.1 +intel-extension-for-tensorflow[cpu]>=2.15,<2.16 tensorflow-hub==0.16.1 -pillow==10.3.0 +pillow==10.4.0 diff --git a/tensorflow/serving/requirements.txt b/tensorflow/serving/requirements.txt index cf28053c..cd80fbcd 100644 --- a/tensorflow/serving/requirements.txt +++ b/tensorflow/serving/requirements.txt @@ -1,5 +1,5 @@ -numpy==2.0.0 -pillow==10.3.0 +numpy==2.0.1 +pillow==10.4.0 requests==2.32.3 -tensorflow==2.16.1 -tensorflow-serving-api==2.16.1 +tensorflow==2.17.0 +tensorflow-serving-api==2.17.0 diff --git a/tensorflow/tests/tests.yaml b/tensorflow/tests/tests.yaml index 0fa5b2b3..43af2239 100644 --- a/tensorflow/tests/tests.yaml +++ b/tensorflow/tests/tests.yaml @@ -14,7 +14,7 @@ --- import-itex-cpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-base cmd: python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())" import-itex-xpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-base @@ -24,20 +24,20 @@ import-itex-xpu-${PACKAGE_OPTION:-pip}: - src: ${PWD}/tensorflow/tests dst: /tests import-cpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-jupyter cmd: python -m jupyter --version import-xpu-jupyter-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-itex-${TF_VERSION:-2.15.1}-itex-xpu-jupyter cmd: python -m jupyter --version device: ["/dev/dri"] import-multinode-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-3.0} cmd: horovodrun --check-build && mpirun --version && python -c 'import horovod.tensorflow as hvd;hvd.init();import horovod.tensorflow' import-inc-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-3.0} cmd: python -c "import neural_compressor as inc;print(inc.__version__)" itex-cpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-base cmd: python /tests/tf_base_test.py volumes: - src: ${PWD}/tensorflow/tests @@ -55,13 +55,13 @@ itex-xpu-jupyter-${PACKAGE_OPTION:-pip}: notebook: True device: ["/dev/dri"] multinode-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-3.0} cmd: horovodrun -np 2 -H localhost:2 --binding-args="-bind-to socket -map-by socket" python /tests/tf_base_test.py volumes: - dst: /tests src: $PWD/tensorflow/tests inc-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.0}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-2.6} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-${TF_VERSION:-2.15.1}-horovod-${HOROVOD_VERSION:-0.28.1}-inc-${INC_VERSION:-3.0} cmd: bash /tests/inc_test.sh volumes: - dst: /tests diff --git a/tensorflow/xpu-requirements.txt b/tensorflow/xpu-requirements.txt index 0280ef9d..9e4bb523 100644 --- a/tensorflow/xpu-requirements.txt +++ b/tensorflow/xpu-requirements.txt @@ -1,2 +1,2 @@ -tensorflow==2.15.1 +tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.1 From 908fd4a56aacfea0fbc571d5cb9ad692a253d00d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 08:58:28 -0700 Subject: [PATCH 14/50] Bump mkdocs-material from 9.5.31 to 9.5.32 in /docs in the docs group (#320) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index bbcf99a8..54a9b029 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,7 @@ mkdocs-callouts>=1.13.2 mkdocs-git-authors-plugin>=0.8.0 mkdocs-git-revision-date-localized-plugin>=1.2.5 -mkdocs-material==9.5.31 +mkdocs-material==9.5.32 mkdocs-table-reader-plugin>=2.1.0 mkdocs==1.6.0 pandas>=2.0.3 From 6197b787ed3fa71581f42bd60bc7104b4be20220 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:08:35 -0700 Subject: [PATCH 15/50] Bump onnxruntime from 1.18.1 to 1.19.0 in /preset (#319) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- preset/inference-optimization/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preset/inference-optimization/requirements.txt b/preset/inference-optimization/requirements.txt index 8f0091ac..3a3f0f13 100644 --- a/preset/inference-optimization/requirements.txt +++ b/preset/inference-optimization/requirements.txt @@ -2,4 +2,4 @@ dataset-librarian==1.0.4 evaluate==0.4.2 git+https://github.com/huggingface/optimum-intel.git tf2onnx==1.16.1 -onnxruntime==1.18.1 +onnxruntime==1.19.0 From f5024ac2be58254da6638e739580e2d4c3870a55 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:14:31 -0700 Subject: [PATCH 16/50] Bump matplotlib from 3.9.1.post1 to 3.9.2 in /classical-ml in the classical-ml group (#323) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- classical-ml/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classical-ml/requirements.txt b/classical-ml/requirements.txt index 484856df..b7ff293b 100644 --- a/classical-ml/requirements.txt +++ b/classical-ml/requirements.txt @@ -1,5 +1,5 @@ daal4py==2024.6.0 -matplotlib==3.9.1.post1 +matplotlib==3.9.2 numpy==1.26.4 scikit-learn-intelex==2024.6.0 threadpoolctl==3.5.0 From e938f411a9f9ab369532774a44e55eae5625b057 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:16:28 -0700 Subject: [PATCH 17/50] Bump github/codeql-action from 3.26.0 to 3.26.2 (#322) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/container-ci.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 20213f5b..48fc214d 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -155,7 +155,7 @@ jobs: - name: Cleanup if: always() run: docker rmi -f ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} - - uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + - uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 with: sarif_file: '${{ matrix.container }}-scan.sarif' category: '${{ matrix.container }}' diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 086103d8..2c387c4a 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -53,6 +53,6 @@ jobs: name: SARIF file path: results.sarif retention-days: 5 - - uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + - uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 with: sarif_file: results.sarif From cdef1e491a1f1d8f67edf1d57ac67c0e177eb3ab Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Tue, 20 Aug 2024 09:25:13 -0700 Subject: [PATCH 18/50] update xpu requirements and image tags (#325) Signed-off-by: Srikanth Ramakrishna --- pytorch/README.md | 2 +- pytorch/xpu-requirements.txt | 4 ++++ tensorflow/README.md | 6 +++--- tensorflow/xpu-requirements.txt | 4 ++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pytorch/README.md b/pytorch/README.md index 53adcb1d..f036502f 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -24,7 +24,7 @@ The images below include support for both CPU and GPU optimizations: | Tag(s) | Pytorch | IPEX | Driver | Dockerfile | | ---------------------- | -------- | -------------- | ------ | --------------- | -| `2.1.40-xpu` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | +| `2.1.40-xpu-pip-base`,`2.1.40-xpu` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | | `2.1.30-xpu` | [v2.1.0] | [v2.1.30+xpu] | [803] | [v0.4.0-Beta] | | `2.1.20-xpu` | [v2.1.0] | [v2.1.20+xpu] | [803] | [v0.3.4] | | `2.1.10-xpu` | [v2.1.0] | [v2.1.10+xpu] | [736] | [v0.2.3] | diff --git a/pytorch/xpu-requirements.txt b/pytorch/xpu-requirements.txt index 5d7d2e8a..09badb28 100644 --- a/pytorch/xpu-requirements.txt +++ b/pytorch/xpu-requirements.txt @@ -6,3 +6,7 @@ oneccl_bind_pt==2.1.400+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us setuptools==69.5.1 numpy==1.26.4 +idna==3.7 +requests==2.32.0 +tqdm==4.66.3 +urllib3==2.2.2 diff --git a/tensorflow/README.md b/tensorflow/README.md index ac2c8b7c..d71dc349 100644 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -16,7 +16,7 @@ The images below include support for both CPU and GPU optimizations: | Tag(s) | TensorFlow | ITEX | Driver | Dockerfile | | ---------------------- | ----------- | -------------- | ------- | --------------- | -| `2.15.0.1-xpu`, `xpu` | [v2.15.1] | [v2.15.0.1] | [803.63]| [v0.4.0-Beta] | +| `2.15.0.1-xpu-pip-base`, `xpu` | [v2.15.1] | [v2.15.0.1] | [803.63]| [v0.4.0-Beta] | | `2.15.0.0-xpu` | [v2.15.0] | [v2.15.0.0] | [803] | [v0.4.0-Beta] | | `2.14.0.1-xpu` | [v2.14.1] | [v2.14.0.1] | [736] | [v0.3.4] | | `2.13.0.0-xpu` | [v2.13.0] | [v2.13.0.0] | [647] | [v0.2.3] | @@ -37,7 +37,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | TensorFlow | IPEX | Driver | Dockerfile | | ------------- | ----------- | ------------- | ------ | --------------- | -| `2.15.0.1-xpu-jupyter` | [v2.15.1] | [v2.15.0.1] | [803.63]| [v0.4.0-Beta] | +| `2.15.0.1-xpu-pip-jupyter` | [v2.15.1] | [v2.15.0.1] | [803.63]| [v0.4.0-Beta] | | `xpu-jupyter` | [v2.14.1] | [v2.14.0.1] | [736] | [v0.3.4] | ### Run the XPU Jupyter Container @@ -49,7 +49,7 @@ docker run -it --rm \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ --ipc=host \ - intel/intel-extension-for-tensorflow:2.15.0.1-xpu-jupyter + intel/intel-extension-for-tensorflow:2.15.0.1-xpu-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. diff --git a/tensorflow/xpu-requirements.txt b/tensorflow/xpu-requirements.txt index 9e4bb523..cbb01fc4 100644 --- a/tensorflow/xpu-requirements.txt +++ b/tensorflow/xpu-requirements.txt @@ -1,2 +1,6 @@ tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.1 +idna==3.7 +requests==2.32.0 +tqdm==4.66.3 +urllib3==2.2.2 From 4139104eeea434bbd7f84ca797d7c5b93148190c Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 23 Aug 2024 10:50:32 -0700 Subject: [PATCH 19/50] Add TGI on Intel Chart (#283) Signed-off-by: tylertitsworth --- workflows/charts/tgi/.helmignore | 23 ++++++ workflows/charts/tgi/Chart.yaml | 42 ++++++++++ workflows/charts/tgi/README.md | 30 +++++++ workflows/charts/tgi/README.md.gotmpl | 16 ++++ workflows/charts/tgi/templates/NOTES.txt | 22 +++++ workflows/charts/tgi/templates/_helpers.tpl | 76 +++++++++++++++++ workflows/charts/tgi/templates/deploy.yaml | 81 +++++++++++++++++++ workflows/charts/tgi/templates/ingress.yaml | 76 +++++++++++++++++ workflows/charts/tgi/templates/secret.yaml | 22 +++++ workflows/charts/tgi/templates/service.yaml | 29 +++++++ .../tgi/templates/tests/test-connection.yaml | 29 +++++++ workflows/charts/tgi/values.yaml | 64 +++++++++++++++ 12 files changed, 510 insertions(+) create mode 100644 workflows/charts/tgi/.helmignore create mode 100644 workflows/charts/tgi/Chart.yaml create mode 100644 workflows/charts/tgi/README.md create mode 100644 workflows/charts/tgi/README.md.gotmpl create mode 100644 workflows/charts/tgi/templates/NOTES.txt create mode 100644 workflows/charts/tgi/templates/_helpers.tpl create mode 100644 workflows/charts/tgi/templates/deploy.yaml create mode 100644 workflows/charts/tgi/templates/ingress.yaml create mode 100644 workflows/charts/tgi/templates/secret.yaml create mode 100644 workflows/charts/tgi/templates/service.yaml create mode 100644 workflows/charts/tgi/templates/tests/test-connection.yaml create mode 100644 workflows/charts/tgi/values.yaml diff --git a/workflows/charts/tgi/.helmignore b/workflows/charts/tgi/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/workflows/charts/tgi/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/workflows/charts/tgi/Chart.yaml b/workflows/charts/tgi/Chart.yaml new file mode 100644 index 00000000..761d8b0c --- /dev/null +++ b/workflows/charts/tgi/Chart.yaml @@ -0,0 +1,42 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: tgi-on-intel +description: A Rust, Python and gRPC server for text generation inference by huggingface on Intel GPUs. + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +maintainers: + - name: tylertitsworth + email: tyler.titsworth@intel.com + url: https://github.com/tylertitsworth +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/workflows/charts/tgi/README.md b/workflows/charts/tgi/README.md new file mode 100644 index 00000000..7c020fe1 --- /dev/null +++ b/workflows/charts/tgi/README.md @@ -0,0 +1,30 @@ +# Text Generation Inference on Intel GPU + +A Rust, Python and gRPC server for text generation inference by huggingface on Intel GPUs. + +For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel). + +> [!TIP] +> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi). + +![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square) + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| deploy.configMapName | string | `"intel-proxy-config"` | ConfigMap of Environment Variables | +| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image | +| deploy.model | string | `"HuggingFaceTB/SmolLM-135M"` | Model to be loaded | +| deploy.quantize | string | `""` | Enable Quantization (ex: bitsandbytes-nf4) | +| deploy.replicaCount | int | `1` | Number of pods | +| deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration | +| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration | +| fullnameOverride | string | `""` | Full qualified Domain Name | +| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration | +| nameOverride | string | `""` | Name of the serving service | +| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token | +| service | object | `{"port":80,"type":"NodePort"}` | Service configuration | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/workflows/charts/tgi/README.md.gotmpl b/workflows/charts/tgi/README.md.gotmpl new file mode 100644 index 00000000..0d773d1a --- /dev/null +++ b/workflows/charts/tgi/README.md.gotmpl @@ -0,0 +1,16 @@ +# Text Generation Inference on Intel GPU + +{{ template "chart.description" . }} + +For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel). + +> [!TIP] +> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi). + +{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }} + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} diff --git a/workflows/charts/tgi/templates/NOTES.txt b/workflows/charts/tgi/templates/NOTES.txt new file mode 100644 index 00000000..fc906eb6 --- /dev/null +++ b/workflows/charts/tgi/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/workflows/charts/tgi/templates/_helpers.tpl b/workflows/charts/tgi/templates/_helpers.tpl new file mode 100644 index 00000000..b98dd8cb --- /dev/null +++ b/workflows/charts/tgi/templates/_helpers.tpl @@ -0,0 +1,76 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{/* +Expand the name of the chart. +*/}} +{{- define "tgi.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tgi.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tgi.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tgi.labels" -}} +helm.sh/chart: {{ include "tgi.chart" . }} +{{ include "tgi.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tgi.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tgi.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "tgi.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/workflows/charts/tgi/templates/deploy.yaml b/workflows/charts/tgi/templates/deploy.yaml new file mode 100644 index 00000000..6c5a5bd5 --- /dev/null +++ b/workflows/charts/tgi/templates/deploy.yaml @@ -0,0 +1,81 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "tgi.fullname" . }} + labels: + {{- include "tgi.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.deploy.replicaCount }} + selector: + matchLabels: + {{- include "tgi.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "tgi.selectorLabels" . | nindent 8 }} + spec: + securityContext: + fsGroup: 1000 + runAsUser: 1000 + containers: + - name: {{ .Chart.Name }} + args: + - '--model-id' + - {{ .Values.deploy.model | quote }} + {{- if index .Values.deploy.resources.limits "gpu.intel.com/i915" }} + - '--num-shard' + - {{ index .Values.deploy.resources.limits "gpu.intel.com/i915" | quote }} + {{- end }} + - '-p' + - {{ .Values.service.port | quote }} + {{- if .Values.quantize }} + - '--quantize' + - {{ .Values.deploy.quantize | quote }} + {{- end }} + - '--cuda-graphs=0' + envFrom: + - configMapRef: + name: {{ .Values.deploy.configMapName }} + - secretRef: + name: {{ .Release.Name }}-hf-token + env: + - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443 + value: /data/numba_cache + image: {{ .Values.deploy.image }} + livenessProbe: + httpGet: + path: /health + port: {{ .Values.service.port }} + initialDelaySeconds: 5 + periodSeconds: 5 + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + resources: + {{- toYaml .Values.deploy.resources | nindent 12 }} + volumeMounts: + - mountPath: /dev/shm + name: dshm + - mountPath: /data + name: hf-data + volumes: + - name: dshm + emptyDir: + medium: Memory + - name: hf-data + emptyDir: {} diff --git a/workflows/charts/tgi/templates/ingress.yaml b/workflows/charts/tgi/templates/ingress.yaml new file mode 100644 index 00000000..f87f6cb0 --- /dev/null +++ b/workflows/charts/tgi/templates/ingress.yaml @@ -0,0 +1,76 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "tgi.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "tgi.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + kubernetes.io/ingress.allow-http: "false" + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/workflows/charts/tgi/templates/secret.yaml b/workflows/charts/tgi/templates/secret.yaml new file mode 100644 index 00000000..0507543e --- /dev/null +++ b/workflows/charts/tgi/templates/secret.yaml @@ -0,0 +1,22 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- $name := .Values.secret.encodedToken | required ".Values.secret.encodedToken is required in Base64 Format." -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-hf-token +type: Opaque +data: + HF_TOKEN: {{ .Values.secret.encodedToken }} diff --git a/workflows/charts/tgi/templates/service.yaml b/workflows/charts/tgi/templates/service.yaml new file mode 100644 index 00000000..7aff68e5 --- /dev/null +++ b/workflows/charts/tgi/templates/service.yaml @@ -0,0 +1,29 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "tgi.fullname" . }} + labels: + {{- include "tgi.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "tgi.selectorLabels" . | nindent 4 }} diff --git a/workflows/charts/tgi/templates/tests/test-connection.yaml b/workflows/charts/tgi/templates/tests/test-connection.yaml new file mode 100644 index 00000000..113d8acf --- /dev/null +++ b/workflows/charts/tgi/templates/tests/test-connection.yaml @@ -0,0 +1,29 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "tgi.fullname" . }}-test-connection" + labels: + {{- include "tgi.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: info + image: curlimages/curl + command: ['sh', '-c'] + args: ['curl --noproxy "*" -f {{ include "tgi.fullname" . }}:{{ .Values.service.port }}/info'] + restartPolicy: OnFailure diff --git a/workflows/charts/tgi/values.yaml b/workflows/charts/tgi/values.yaml new file mode 100644 index 00000000..7d2434cc --- /dev/null +++ b/workflows/charts/tgi/values.yaml @@ -0,0 +1,64 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -- Name of the serving service +nameOverride: "" +# -- Full qualified Domain Name +fullnameOverride: "" +deploy: + # -- ConfigMap of Environment Variables + configMapName: intel-proxy-config + # -- Intel TGI Image + image: ghcr.io/huggingface/text-generation-inference:latest-intel + # -- Model to be loaded + model: HuggingFaceTB/SmolLM-135M + # -- Enable Quantization (ex: bitsandbytes-nf4) + quantize: "" + # -- Number of pods + replicaCount: 1 + # -- Resource configuration + resources: + limits: + cpu: 4000m + # -- Intel GPU Device Configuration + gpu.intel.com/i915: 1 + # habana.ai/gaudi: 1 + # memory: 409Gi + # hugepages-2Mi: 95000Mi + requests: + cpu: 1000m + memory: "1Gi" +secret: + # -- Base64 Encoded Huggingface Hub API Token + encodedToken: "" +# -- Service configuration +service: + port: 80 + type: NodePort +# -- Ingress configuration +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local From 13a241680d9c6869d9afd7b156dcbd953db4f463 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 08:37:21 -0700 Subject: [PATCH 20/50] Bump github/codeql-action from 3.26.2 to 3.26.5 (#333) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/container-ci.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 48fc214d..27ccabad 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -155,7 +155,7 @@ jobs: - name: Cleanup if: always() run: docker rmi -f ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} - - uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 + - uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: sarif_file: '${{ matrix.container }}-scan.sarif' category: '${{ matrix.container }}' diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 2c387c4a..1c08423e 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -53,6 +53,6 @@ jobs: name: SARIF file path: results.sarif retention-days: 5 - - uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 + - uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: sarif_file: results.sarif From 262a89c27763c9af7b27bdf37c80bf52cdd9ede5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:48:26 +0000 Subject: [PATCH 21/50] Bump mkdocs-material from 9.5.32 to 9.5.33 in /docs in the docs group (#331) --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 54a9b029..1058a38f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,7 @@ mkdocs-callouts>=1.13.2 mkdocs-git-authors-plugin>=0.8.0 mkdocs-git-revision-date-localized-plugin>=1.2.5 -mkdocs-material==9.5.32 +mkdocs-material==9.5.33 mkdocs-table-reader-plugin>=2.1.0 mkdocs==1.6.0 pandas>=2.0.3 From 658e61e414029596e99a7342855e7762862155c7 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Mon, 26 Aug 2024 10:53:24 -0700 Subject: [PATCH 22/50] IPEX XPU Torchserve support (#336) Signed-off-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 104 +++++++++++++++--- pytorch/docker-compose.yaml | 31 +++++- pytorch/serving/README.md | 56 +++++++++- pytorch/serving/config-xpu.properties | 15 +++ .../serving/model-archive/ipex_squeezenet.py | 57 ++++++++++ pytorch/serving/model-archive/mar-test.sh | 16 ++- pytorch/serving/tests.yaml | 47 ++++++-- .../{ => serving}/torchserve-requirements.txt | 0 .../serving/torchserve-xpu-requirements.txt | 15 +++ 9 files changed, 302 insertions(+), 39 deletions(-) create mode 100644 pytorch/serving/config-xpu.properties create mode 100644 pytorch/serving/model-archive/ipex_squeezenet.py rename pytorch/{ => serving}/torchserve-requirements.txt (100%) create mode 100644 pytorch/serving/torchserve-xpu-requirements.txt diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 1a5b497d..2f7903d1 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -34,6 +34,7 @@ ARG BASE_IMAGE_TAG ARG PACKAGE_OPTION=pip ARG PYTHON_VERSION ARG PYTHON_BASE=${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER}-${BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${PACKAGE_OPTION}-py${PYTHON_VERSION}-base +ARG TORCHSERVE_BASE=${PYTHON_BASE} FROM ${PYTHON_BASE} AS ipex-base-pip WORKDIR / @@ -181,13 +182,17 @@ RUN apt-get update && \ intel-oneapi-runtime-mkl=${MKL_VER} \ intel-oneapi-runtime-ccl=${CCL_VER}; +RUN rm -rf /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list + +ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH + +FROM ipex-xpu-base AS ipex-xpu-base-wheels + WORKDIR / COPY xpu-requirements.txt . RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ - rm -rf xpu-requirements.txt /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list - -ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH + rm -rf xpu-requirements.txt FROM ipex-xpu-base AS ipex-xpu-jupyter @@ -205,7 +210,8 @@ EXPOSE 8888 CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX"] -FROM ${PYTHON_BASE} as torchserve-base + +FROM ${TORCHSERVE_BASE} as torchserve-base ENV PYTHONUNBUFFERED=TRUE @@ -221,8 +227,6 @@ RUN useradd -m -s /bin/bash model-server && \ mkdir -p /home/model-server/model-store && \ chown -R model-server /home/model-server/ -FROM torchserve-base AS compile - RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ g++ \ git \ @@ -237,11 +241,6 @@ RUN python3 -m venv /home/venv ENV PATH="/home/venv/bin:$PATH" WORKDIR /home/model-server -COPY torchserve-requirements.txt . -COPY requirements.txt . - -RUN python -m pip install --no-cache-dir -r requirements.txt && \ - python -m pip install --no-cache-dir -r torchserve-requirements.txt RUN echo -e "#!/bin/bash \n\ set -e \n\ @@ -253,13 +252,29 @@ else \n\ fi \n\ tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh -FROM torchserve-base AS torchserve +FROM torchserve-base AS compile-cpu + +COPY serving/torchserve-requirements.txt . +COPY requirements.txt . + +RUN python -m pip install --no-cache-dir -r requirements.txt && \ + python -m pip install --no-cache-dir -r torchserve-requirements.txt && \ + rm -rf requirements.txt torchserve-requirements.txt + +FROM torchserve-base AS compile-xpu + +COPY serving/torchserve-xpu-requirements.txt . + +RUN python -m pip install --no-cache-dir -r torchserve-xpu-requirements.txt && \ + rm -rf torchserve-xpu-requirements.txt + +FROM torchserve-base AS torchserve-cpu USER model-server WORKDIR /home/model-server -COPY --chown=model-server --from=compile /home/venv /home/venv -COPY --chown=model-server --chmod=755 --from=compile /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY --chown=model-server --from=compile-cpu /home/venv /home/venv +COPY --chown=model-server --chmod=755 --from=compile-cpu /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh COPY --chown=model-server serving/config.properties /home/model-server/config.properties ENV PATH="/home/venv/bin:$PATH" @@ -270,3 +285,64 @@ EXPOSE 8080 8081 8082 7070 7071 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] CMD ["serve"] + +FROM torchserve-base AS torchserve-xpu + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --fix-missing \ + gnupg2 \ + gpg-agent \ + rsync && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --dearmor --yes --output /usr/share/keyrings/intel-graphics.gpg +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified" | \ + tee /etc/apt/sources.list.d/intel-gpu-jammy.list + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + jq \ + curl \ + libnl-genl-3-200 \ + intel-gsc \ + libdrm2 \ + intel-metrics-discovery \ + intel-metrics-library && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +ARG XPU_SMI_VERSION + +ARG API_URL=https://api.github.com/repos/intel/xpumanager/releases/tags/V${XPU_SMI_VERSION} + +RUN wget -q --header="Accept: application/vnd.github.v3+json" --header="User-Agent: MyClient/1.0.0" -O - "$API_URL" | tee /tmp/asset_data.txt && \ + wget -q --no-check-certificate "$(jq -r '.assets[] | select(.name | test("^xpu-smi.*u22\\.04_amd64\\.deb$")) | .browser_download_url' < /tmp/asset_data.txt)" && \ + ldconfig && dpkg -i --force-all -- *.deb && \ + rm -rf -- *.deb /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list /tmp/asset_data.txt + +ARG GID=109 + +RUN groupadd -g ${GID} render &&\ + usermod -aG video,render model-server + +USER model-server + +WORKDIR /home/model-server + +RUN wget --progress=dot:giga https://raw.githubusercontent.com/pytorch/serve/master/examples/intel_extension_for_pytorch/intel_gpu_metric_collector.py && \ + wget --progress=dot:giga https://raw.githubusercontent.com/pytorch/serve/master/examples/intel_extension_for_pytorch/intel_gpu.py + +COPY --chown=model-server --from=compile-xpu /home/venv /home/venv +COPY --chown=model-server --chmod=755 --from=compile-xpu /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY --chown=model-server serving/config-xpu.properties /home/model-server/config.properties + +ENV PATH="/home/venv/bin:$PATH" +ENV TEMP=/home/model-server/tmp + +# 8080/8081/8082 REST and 7070/7071 gRPC +EXPOSE 8080 8081 8082 7070 7071 + +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["serve"] diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 838ee5cb..6aeeefc9 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -122,7 +122,7 @@ services: org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Base Image" org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base - target: ipex-xpu-base + target: ipex-xpu-base-wheels command: > python -c "import torch;print(torch.device('xpu'));import intel_extension_for_pytorch as @@ -156,7 +156,7 @@ services: image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter ports: - 8888:8888 - torchserve: + torchserve-cpu: build: args: PACKAGE_OPTION: pip @@ -165,22 +165,43 @@ services: dependency.apt.openjdk-17-jdk: true dependency.idp: false dependency.python.ipex: requirements.txt - dependency.python.pip: torchserve-requirements.txt + dependency.python.pip: serving/torchserve-requirements.txt docs: serving org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch Serving Image" org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-serving-cpu - target: torchserve + target: torchserve-cpu command: torchserve --version entrypoint: "" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu ports: - 8080:8080 - 8081:8081 - 8082:8082 - 7070:7070 - 7071:7071 + torchserve-xpu: + build: + args: + PACKAGE_OPTION: pip + XPU_SMI_VERSION: ${XPU_SMI_VERSION:-1.2.38} + TORCHSERVE_BASE: ipex-xpu-base + labels: + dependency.apt.numactl: true + dependency.apt.openjdk-17-jdk: true + dependency.apt.xpu-smi: ${XPU_SMI_VERSION:-1.2.38} + dependency.idp: false + dependency.python.pip: serving/torchserve-xpu-requirements.txt + docs: serving + org.opencontainers.base.name: "intel/python:3.10-core" + org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Serving Image" + org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-serving-xpu + target: torchserve-xpu + command: torchserve --version + entrypoint: "" + extends: xpu + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-xpu hf-genai: build: args: diff --git a/pytorch/serving/README.md b/pytorch/serving/README.md index 5e48251f..6ca33ef1 100644 --- a/pytorch/serving/README.md +++ b/pytorch/serving/README.md @@ -12,29 +12,73 @@ The [Torchserve Model Archiver](https://github.com/pytorch/serve/blob/master/mod Follow the instructions found in the link above depending on whether you are intending to archive a model or a workflow. Use the provided container rather than installing the archiver with the example command below: +#### Create a Model Archive for CPU device + ```bash curl -O https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth docker run --rm -it \ + --entrypoint='' \ + -u root \ -v $PWD:/home/model-server \ intel/intel-optimized-pytorch:2.4.0-serving-cpu \ - torch-model-archiver --model-name squeezenet \ - --version 1.0 \ - --model-file model-archive/model.py \ - --serialized-file squeezenet1_1-b8a52dc0.pth \ - --handler image_classifier \ - --export-path /home/model-server + torch-model-archiver --model-name squeezenet1_1 \ + --version 1.1 \ + --model-file model-archive/model.py \ + --serialized-file squeezenet1_1-b8a52dc0.pth \ + --handler image_classifier \ + --export-path /home/model-server +``` + +### Create a Model Archive for XPU device + +Use a squeezenet model [optimized](./model-store/ipex_squeezenet.py) for XPU using Intel® Extension for PyTorch*. + +```bash +docker run --rm -it \ + --entrypoint='' \ + -u root \ + -v $PWD:/home/model-server \ + --device /dev/dri \ + intel/intel-optimized-pytorch:2.1.40-serving-xpu \ + sh -c 'python model-archive/ipex_squeezenet.py && \ + torch-model-archiver --model-name squeezenet1_1 \ + --version 1.1 \ + --serialized-file squeezenet1_1-jit.pt \ + --handler image_classifier \ + --export-path /home/model-server' ``` ### Test Model Test Torchserve with the new archived model. The example below is for the squeezenet model. +#### Run Torchserve for CPU device + ```bash # Assuming that the above pre-archived model is in the current working directory docker run -d --rm --name server \ -v $PWD:/home/model-server/model-store \ + -v $PWD/wf-store:/home/model-server/wf-store \ --net=host \ intel/intel-optimized-pytorch:2.4.0-serving-cpu +``` + +#### Run Torchserve for XPU device + +```bash +# Assuming that the above pre-archived model is in the current working directory +docker run -d --rm --name server \ + -v $PWD:/home/model-server/model-store \ + -v $PWD/wf-store:/home/model-server/wf-store \ + -v $PWD/config-xpu.properties:/home/model-server/config.properties \ + --net=host \ + --device /dev/dri \ + intel/intel-optimized-pytorch:2.1.40-serving-xpu +``` + +After lauching the container, follow the steps below: + +```bash # Verify that the container has launched successfully docker logs server # Attempt to register the model and make an inference request diff --git a/pytorch/serving/config-xpu.properties b/pytorch/serving/config-xpu.properties new file mode 100644 index 00000000..170a1485 --- /dev/null +++ b/pytorch/serving/config-xpu.properties @@ -0,0 +1,15 @@ +inference_address=http://0.0.0.0:8080 +management_address=http://0.0.0.0:8081 +metrics_address=http://0.0.0.0:8082 +number_of_netty_threads=32 +install_py_dep_per_model=true +job_queue_size=1000 +model_store=/home/model-server/model-store +workflow_store=/home/model-server/wf-store +allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.* +ipex_enable=true +ipex_gpu_enable=true +system_metrics_cmd=/home/model-server/intel_gpu_metric_collector.py --gpu 1 +disable_token_authorization=true +enable_model_api=true +enable_envvars_config=true diff --git a/pytorch/serving/model-archive/ipex_squeezenet.py b/pytorch/serving/model-archive/ipex_squeezenet.py new file mode 100644 index 00000000..14c0dcb4 --- /dev/null +++ b/pytorch/serving/model-archive/ipex_squeezenet.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. +# based on https://github.com/pytorch/pytorch/blob/master/Dockerfile +# +# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ + +# pylint: skip-file + +import intel_extension_for_pytorch as ipex +import torch +import torchvision.models as models + +# load the model +model = models.squeezenet1_1(pretrained=True) +model = model.eval() + +# define dummy input tensor to use for the model's forward call to record operations in the model for tracing +N, C, H, W = 1, 3, 224, 224 +data = torch.randn(N, C, H, W) + +model.eval() +data = torch.rand(1, 3, 224, 224) + +#################### code changes ################# +model = model.to("xpu") +data = data.to("xpu") +model = ipex.optimize(model, dtype=torch.bfloat16) +#################### code changes ################# + +with torch.no_grad(): + with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16): + ############################# code changes ##################### + model = torch.jit.trace(model, data) + model = torch.jit.freeze(model) + model(data) +torch.jit.save(model, "squeezenet1_1-jit.pt") diff --git a/pytorch/serving/model-archive/mar-test.sh b/pytorch/serving/model-archive/mar-test.sh index f07b83ad..aabee71f 100644 --- a/pytorch/serving/model-archive/mar-test.sh +++ b/pytorch/serving/model-archive/mar-test.sh @@ -26,8 +26,18 @@ # For reference: # https://docs.docker.com/develop/develop-images/build_enhancements/ -wget https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth -torch-model-archiver --model-name squeezenet1_1 --version 1.1 --model-file /home/model-server/model-archive/model.py --serialized-file squeezenet1_1-b8a52dc0.pth --handler image_classifier --export-path /home/model-server/model-store +if [[ "$1" == "cpu" ]]; then + wget https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth + torch-model-archiver --model-name squeezenet1_1 --version 1.1 --model-file /home/model-server/model-archive/model.py --serialized-file squeezenet1_1-b8a52dc0.pth --handler image_classifier --export-path /home/model-server/model-store + rm -rf squeezenet1_1-b8a52dc0.pth +elif [[ "$1" == "xpu" ]]; then + python /home/model-server/model-archive/ipex_squeezenet.py + torch-model-archiver --model-name squeezenet1_1 --version 1.1 --serialized-file squeezenet1_1-jit.pt --handler image_classifier --export-path /home/model-server/model-store + rm -rf squeezenet1_1-jit.pt +else + echo "Only cpu and xpu devices supported" + exit 1 +fi + [ -f "/home/model-server/model-store/squeezenet1_1.mar" ] && echo "squeezenet1_1.pth Archived Succesfully at /home/model-server/model-store/squeezenet1_1.mar" -rm -rf squeezenet1_1-b8a52dc0.pth find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf diff --git a/pytorch/serving/tests.yaml b/pytorch/serving/tests.yaml index 3c91eced..986e220a 100644 --- a/pytorch/serving/tests.yaml +++ b/pytorch/serving/tests.yaml @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -ipex-serving-model-archive: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve - cmd: /home/model-server/model-archive/mar-test.sh +ipex-serving-cpu-model-archive: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu + cmd: /home/model-server/model-archive/mar-test.sh cpu entrypoint: /bin/bash volumes: - src: $PWD/pytorch/serving/model-archive @@ -23,8 +23,20 @@ ipex-serving-model-archive: dst: /home/model-server/model-store user: root workdir: /home/model-server/model-archive -ipex-serving-workflow-archive: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve +ipex-serving-xpu-model-archive: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-xpu + cmd: /home/model-server/model-archive/mar-test.sh xpu + entrypoint: /bin/bash + device: ["/dev/dri"] + volumes: + - src: $PWD/pytorch/serving/model-archive + dst: /home/model-server/model-archive + - src: $PWD/pytorch/serving/model-store + dst: /home/model-server/model-store + user: root + workdir: /home/model-server/model-archive +ipex-serving-cpu-workflow-archive: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu cmd: /home/model-server/model-archive/war-test.sh entrypoint: /bin/bash volumes: @@ -34,10 +46,23 @@ ipex-serving-workflow-archive: dst: /home/model-server/wf-store user: root workdir: /home/model-server/model-archive -ipex-serving-rest-workflow: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve +ipex-serving-cpu-rest-workflow: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu + cmd: bash /home/model-server/wf-store/rest-test.sh + serving: True + volumes: + - src: $PWD/pytorch/serving/model-store + dst: /home/model-server/model-store + - src: $PWD/pytorch/serving/wf-store + dst: /home/model-server/wf-store + env: + ENABLE_TORCH_PROFILER: 'true' + shm_size: 1g +ipex-serving-xpu-rest-workflow: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-xpu cmd: bash /home/model-server/wf-store/rest-test.sh serving: True + device: ["/dev/dri"] volumes: - src: $PWD/pytorch/serving/model-store dst: /home/model-server/model-store @@ -47,8 +72,8 @@ ipex-serving-rest-workflow: ENABLE_TORCH_PROFILER: 'true' shm_size: 1g workdir: /home/model-server/wf-store -ipex-serving-rest-inference: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve +ipex-serving-cpu-rest-inference: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu cmd: bash /home/model-server/model-store/rest-test.sh serving: True volumes: @@ -60,8 +85,8 @@ ipex-serving-rest-inference: ENABLE_TORCH_PROFILER: 'true' shm_size: 1g workdir: /home/model-server/model-store -ipex-serving-grpc-inference: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve +ipex-serving-cpu-grpc-inference: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu cmd: bash /home/model-server/model-store/grpc-test.sh serving: True volumes: diff --git a/pytorch/torchserve-requirements.txt b/pytorch/serving/torchserve-requirements.txt similarity index 100% rename from pytorch/torchserve-requirements.txt rename to pytorch/serving/torchserve-requirements.txt diff --git a/pytorch/serving/torchserve-xpu-requirements.txt b/pytorch/serving/torchserve-xpu-requirements.txt new file mode 100644 index 00000000..534f6514 --- /dev/null +++ b/pytorch/serving/torchserve-xpu-requirements.txt @@ -0,0 +1,15 @@ +torch==2.1.0.post3+cxx11.abi +torchvision==0.16.0.post3+cxx11.abi +torchaudio==2.1.0.post3+cxx11.abi +intel_extension_for_pytorch==2.1.40+xpu +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us +setuptools==69.5.1 +numpy==1.26.4 +captum>=0.7.0 +cython>=3.0.10 +pynvml>=11.5.0 +pyyaml>=6.0.1 +-f https://download.pytorch.org/whl/torch_stable.html +torch-model-archiver==0.11.1 +torch-workflow-archiver==0.2.14 +torchserve==0.11.1 From 18f2760ba0d74df7835240f2f5efaa634743a82a Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Mon, 26 Aug 2024 15:47:02 -0700 Subject: [PATCH 23/50] Correct TM&B (#329) Signed-off-by: tylertitsworth --- CONTRIBUTING.md | 4 ++-- README.md | 4 ++-- classical-ml/README.md | 2 +- mkdocs.yml | 2 +- python/README.md | 2 +- pytorch/README.md | 4 +--- pytorch/serving/README.md | 2 +- tensorflow/README.md | 2 +- workflows/README.md | 8 ++++---- workflows/charts/huggingface-llm/README.md | 2 +- workflows/charts/torchserve/Chart.yaml | 4 ++-- workflows/charts/torchserve/README.md | 8 ++++---- workflows/charts/torchserve/README.md.gotmpl | 4 ++-- 13 files changed, 23 insertions(+), 25 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f431ef44..9d61a8ac 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -Thank you for considering contributing to Intel® AI Containers! We welcome your help to make this project better. Contributing to an open source project can be a daunting task, but the Intel AI Containers team is here to help you through the process. If at any point in this process you feel out of your depth or confused by our processes, please don't hesitate to reach out to a maintainer or file an [issue](https://github.com/intel/ai-containers/issues). +Thank you for considering contributing to AI Containers! We welcome your help to make this project better. Contributing to an open source project can be a daunting task, but the Intel AI Containers team is here to help you through the process. If at any point in this process you feel out of your depth or confused by our processes, please don't hesitate to reach out to a maintainer or file an [issue](https://github.com/intel/ai-containers/issues). ## Getting Started @@ -138,4 +138,4 @@ commit automatically with `git commit -s`. ## License -Intel® AI Containers is licensed under the terms in [LICENSE](./LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. +AI Containers is licensed under the terms in [LICENSE](./LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. diff --git a/README.md b/README.md index 23705123..a152105c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Intel® AI Containers +# AI Containers [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8270/badge)](https://www.bestpractices.dev/projects/8270) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/intel/ai-containers/badge)](https://securityscorecards.dev/viewer/?uri=github.com/intel/ai-containers) @@ -28,7 +28,7 @@ docker login $REGISTRY docker pull $REGISTRY/$REPO:latest ``` -The maintainers of Intel® AI Containers use Azure to store containers, but an open source container registry like [harbor](https://github.com/goharbor/harbor) is preferred. +The maintainers of AI Containers use Azure to store containers, but an open source container registry like [harbor](https://github.com/goharbor/harbor) is preferred. > [!WARNING] > You can optionally skip this step and use some placeholder values, however some container groups depend on other images and will pull from a registry that you have not defined and result in an error. diff --git a/classical-ml/README.md b/classical-ml/README.md index 9d63355c..97cc50c2 100644 --- a/classical-ml/README.md +++ b/classical-ml/README.md @@ -63,7 +63,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s ## Build from Source -To build the images from source, clone the [Intel® AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: +To build the images from source, clone the [AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: ```bash cd classical-ml diff --git a/mkdocs.yml b/mkdocs.yml index 20c73487..94322e7d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -51,7 +51,7 @@ plugins: - read_csv repo_name: intel/ai-containers repo_url: https://github.com/intel/ai-containers -site_name: Intel® AI Containers +site_name: AI Containers #TODO: Get previous container versions in an easy way # https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/ theme: diff --git a/python/README.md b/python/README.md index 0b9f95dc..25d3d230 100644 --- a/python/README.md +++ b/python/README.md @@ -15,7 +15,7 @@ The images below include variations for only the core packages in the [Intel® D ## Build from Source -To build the images from source, clone the [Intel® AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: +To build the images from source, clone the [AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: ```bash cd python diff --git a/pytorch/README.md b/pytorch/README.md index f036502f..f3fcda4e 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -241,8 +241,6 @@ Additionally, if you have a [DeepSpeed* configuration](https://www.deepspeed.ai/ --- -#### Hugging Face Generative AI Container - The image below is an extension of the IPEX Multi-Node Container designed to run Hugging Face Generative AI scripts. The container has the typical installations needed to run and fine tune PyTorch generative text models from Hugging Face. It can be used to run multinode jobs using the same instructions from the [IPEX Multi-Node container](#setup-and-run-ipex-multi-node-container). | Tag(s) | Pytorch | IPEX | oneCCL | HF Transformers | Dockerfile | @@ -324,7 +322,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s ## Build from Source -To build the images from source, clone the [Intel® AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: +To build the images from source, clone the [AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: ```bash cd pytorch diff --git a/pytorch/serving/README.md b/pytorch/serving/README.md index 6ca33ef1..08114bba 100644 --- a/pytorch/serving/README.md +++ b/pytorch/serving/README.md @@ -155,7 +155,7 @@ As demonstrated in the above example, models must be registered before they can ### KServe -Apply Intel Optimizations to KServe by patching the serving runtimes to use Intel Optimized Serving Containers with `kubectl apply -f patch.yaml` +Apply Intel Optimizations to KServe by patching the serving runtimes to use Serving Containers with Intel Optimizations via `kubectl apply -f patch.yaml` > [!NOTE] > You can modify this `patch.yaml` file to change the serving runtime pod configuration. diff --git a/tensorflow/README.md b/tensorflow/README.md index d71dc349..990ecf71 100644 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -286,7 +286,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s ## Build from Source -To build the images from source, clone the [Intel® AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: +To build the images from source, clone the [AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: ```bash cd pytorch diff --git a/workflows/README.md b/workflows/README.md index 21269eb7..1f6c9ea6 100644 --- a/workflows/README.md +++ b/workflows/README.md @@ -1,6 +1,6 @@ # Intel® AI Workflows -Demonstrating showing how the [Intel® AI Containers] can be used for different use cases: +Demonstrating showing how the [AI Containers] can be used for different use cases: ## PyTorch Workflows @@ -11,7 +11,7 @@ Demonstrating showing how the [Intel® AI Containers] can be used for different ## Build from Source -To build the images from source, clone the [Intel® AI Containers] repository, follow the main `README.md` file to setup your environment, and run the following command: +To build the images from source, clone the [AI Containers] repository, follow the main `README.md` file to setup your environment, and run the following command: ```bash cd workflows/charts/huggingface-llm @@ -21,7 +21,7 @@ docker compose run huggingface-llm sh -c "python /workspace/scripts/finetune.py ## License -View the [License](https://github.com/intel/ai-containers/blob/main/LICENSE) for the [Intel® AI Containers]. +View the [License](https://github.com/intel/ai-containers/blob/main/LICENSE) for the [AI Containers]. The images below also contain other software which may be under other licenses (such as Pytorch*, Jupyter*, Bash, etc. from the base). @@ -31,6 +31,6 @@ It is the image user's responsibility to ensure that any use of The images below -[Intel® AI Containers]: https://github.com/intel/ai-containers +[AI Containers]: https://github.com/intel/ai-containers [Distributed LLM Fine Tuning with Kubernetes]: https://github.com/intel/ai-containers/tree/main/workflows/charts/huggingface-llm [TorchServe* with Kubernetes]: https://github.com/intel/ai-containers/tree/main/workflows/charts/torchserve diff --git a/workflows/charts/huggingface-llm/README.md b/workflows/charts/huggingface-llm/README.md index e2439830..47755eef 100644 --- a/workflows/charts/huggingface-llm/README.md +++ b/workflows/charts/huggingface-llm/README.md @@ -347,4 +347,4 @@ fine tune the model. ``` ---------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.13.1](https://github.com/norwoodj/helm-docs/releases/v1.13.1) +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/workflows/charts/torchserve/Chart.yaml b/workflows/charts/torchserve/Chart.yaml index f3472530..99db0496 100644 --- a/workflows/charts/torchserve/Chart.yaml +++ b/workflows/charts/torchserve/Chart.yaml @@ -13,8 +13,8 @@ # limitations under the License. apiVersion: v2 -name: intel-torchserve -description: Intel TorchServe is a performant, flexible and easy to use tool for serving PyTorch models in production. +name: torchserve-on-intel +description: TorchServe on Intel is a performant, flexible and easy to use tool for serving PyTorch models in production. # A chart can be either an 'application' or a 'library' chart. # diff --git a/workflows/charts/torchserve/README.md b/workflows/charts/torchserve/README.md index b35cc7d4..c1a717f5 100644 --- a/workflows/charts/torchserve/README.md +++ b/workflows/charts/torchserve/README.md @@ -1,8 +1,8 @@ -# Intel TorchServe +# TorchServe with Intel Optimizations -Intel TorchServe is a performant, flexible and easy to use tool for serving PyTorch models in production. +TorchServe on Intel is a performant, flexible and easy to use tool for serving PyTorch models in production. -For more information about how to use Intel Optimized TorchServe, check out the [container documentation](../../../pytorch/serving/README.md). +For more information about how to use TorchServe with Intel Optimizations, check out the [container documentation](../../../pytorch/serving/README.md). ![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square) @@ -18,7 +18,7 @@ For more information about how to use Intel Optimized TorchServe, check out the | deploy.resources.limits | object | `{"cpu":"4000m","memory":"1Gi"}` | Maximum resources per pod | | deploy.resources.requests | object | `{"cpu":"1000m","memory":"512Mi"}` | Minimum resources per pod | | deploy.storage.nfs | object | `{"enabled":false,"path":"nil","readOnly":true,"server":"nil","subPath":"nil"}` | Network File System (NFS) storage for models | -| deploy.tokens_disabled | bool | `false` | Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. | +| deploy.tokens_disabled | bool | `true` | Set token authentication on or off. Checkout the latest [torchserve docs](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more details. | | fullnameOverride | string | `""` | Full qualified Domain Name | | nameOverride | string | `""` | Name of the serving service | | pvc.size | string | `"1Gi"` | Size of the storage | diff --git a/workflows/charts/torchserve/README.md.gotmpl b/workflows/charts/torchserve/README.md.gotmpl index 1ddf329d..465c03ae 100644 --- a/workflows/charts/torchserve/README.md.gotmpl +++ b/workflows/charts/torchserve/README.md.gotmpl @@ -1,8 +1,8 @@ -# Intel TorchServe +# TorchServe with Intel Optimizations {{ template "chart.description" . }} -For more information about how to use Intel Optimized TorchServe, check out the [container documentation](../../../pytorch/serving/README.md). +For more information about how to use TorchServe with Intel Optimizations, check out the [container documentation](../../../pytorch/serving/README.md). {{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }} From cd824cf70f6f8a28e31643891328f6d5f59c4c44 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 27 Aug 2024 09:27:07 -0700 Subject: [PATCH 24/50] Open Up XPU Conda Reqs (#330) Signed-off-by: tylertitsworth Signed-off-by: Tyler Titsworth --- pytorch/xpu-requirements.txt | 10 +++++----- tensorflow/xpu-requirements.txt | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pytorch/xpu-requirements.txt b/pytorch/xpu-requirements.txt index 09badb28..e7771aa0 100644 --- a/pytorch/xpu-requirements.txt +++ b/pytorch/xpu-requirements.txt @@ -5,8 +5,8 @@ intel_extension_for_pytorch==2.1.40+xpu oneccl_bind_pt==2.1.400+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us setuptools==69.5.1 -numpy==1.26.4 -idna==3.7 -requests==2.32.0 -tqdm==4.66.3 -urllib3==2.2.2 +numpy>=1.26.4 +idna>=3.7 +requests>=2.32.0 +tqdm>=4.66.3 +urllib3>=2.2.2 diff --git a/tensorflow/xpu-requirements.txt b/tensorflow/xpu-requirements.txt index cbb01fc4..a338e80f 100644 --- a/tensorflow/xpu-requirements.txt +++ b/tensorflow/xpu-requirements.txt @@ -1,6 +1,6 @@ tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.1 -idna==3.7 -requests==2.32.0 -tqdm==4.66.3 -urllib3==2.2.2 +idna>=3.7 +requests>=2.32.0 +tqdm>=4.66.3 +urllib3>=2.2.2 From 7ad40200d702be4ddeef9d2e9e176ff63b3c689f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:01:02 -0700 Subject: [PATCH 25/50] Bump numpy from 2.0.1 to 2.1.0 in /tensorflow in the tensorflow group across 1 directory (#341) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- tensorflow/serving/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/serving/requirements.txt b/tensorflow/serving/requirements.txt index cd80fbcd..1f2e56e4 100644 --- a/tensorflow/serving/requirements.txt +++ b/tensorflow/serving/requirements.txt @@ -1,4 +1,4 @@ -numpy==2.0.1 +numpy==2.1.0 pillow==10.4.0 requests==2.32.3 tensorflow==2.17.0 From 4bbf3e5429f6fdf273b557368de8237e91a9f290 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Tue, 27 Aug 2024 12:52:51 -0700 Subject: [PATCH 26/50] validate deepspeed on single node single gpu Signed-off-by: Srikanth Ramakrishna --- python/docker-compose.yaml | 2 + pytorch/Dockerfile | 150 ++++++++++++++++++++---------------- pytorch/docker-compose.yaml | 29 ++++++- pytorch/tests/tests.yaml | 14 +++- 4 files changed, 124 insertions(+), 71 deletions(-) diff --git a/python/docker-compose.yaml b/python/docker-compose.yaml index a8039de4..8db2d586 100644 --- a/python/docker-compose.yaml +++ b/python/docker-compose.yaml @@ -17,6 +17,8 @@ services: build: args: MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} + no_proxy: "" + NO_PROXY: "" context: . labels: dependency.apt.wget: true diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 2f7903d1..4c1b84e7 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -35,6 +35,7 @@ ARG PACKAGE_OPTION=pip ARG PYTHON_VERSION ARG PYTHON_BASE=${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER}-${BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${PACKAGE_OPTION}-py${PYTHON_VERSION}-base ARG TORCHSERVE_BASE=${PYTHON_BASE} +ARG MULTINODE_BASE=ipex-base-${PACKAGE_OPTION} FROM ${PYTHON_BASE} AS ipex-base-pip WORKDIR / @@ -66,73 +67,6 @@ EXPOSE 8888 CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX"] -FROM ipex-base-${PACKAGE_OPTION} AS multinode - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - python3-dev \ - gcc \ - g++ \ - libgl1-mesa-glx \ - libglib2.0-0 \ - libopenmpi-dev \ - numactl \ - virtualenv - -ENV SIGOPT_PROJECT=. - -WORKDIR / -COPY multinode/requirements.txt requirements.txt - -RUN python -m pip install --no-cache-dir -r requirements.txt && \ - DS_BUILD_OPS=1 python -m pip install --no-cache-dir deepspeed==0.14.4 && \ - echo "Y" | pip uninstall nvidia-ml-py && \ - rm -rf requirements.txt - -ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" - -RUN apt-get install -y --no-install-recommends --fix-missing \ - openssh-client \ - openssh-server && \ - rm /etc/ssh/ssh_host_*_key \ - /etc/ssh/ssh_host_*_key.pub && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN mkdir -p /var/run/sshd - -ARG PYTHON_VERSION - -COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh - -# modify generate_ssh_keys to be a helper script -# print how to use helper script on bash startup -# Avoids loop for further execution of the startup file -ARG PACKAGE_OPTION=pip -ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" -RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ - echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ - cat '/generate_ssh_keys.sh' >> ~/.startup && \ - rm -rf /generate_ssh_keys.sh - -COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh -COPY multinode/sshd_config /etc/ssh/sshd_config -COPY multinode/ssh_config /etc/ssh/ssh_config - -RUN mkdir -p /licensing - -RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src/oneCCL/b7d66de16e17f88caffd7c6df4cd5e12b266af84/third-party-programs.txt -O /licensing/oneccl_third_party_programs.txt && \ - wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ - wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE - -ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] - -FROM multinode AS hf-genai - -COPY hf-genai-requirements.txt . - -RUN python -m pip install --no-cache-dir -r hf-genai-requirements.txt && \ - rm -rf hf-genai-requirements.txt - FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ @@ -194,6 +128,75 @@ COPY xpu-requirements.txt . RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ rm -rf xpu-requirements.txt +FROM ${MULTINODE_BASE} AS multinode + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + python3-dev \ + gcc \ + g++ \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libopenmpi-dev \ + numactl \ + virtualenv + +RUN apt-get install -y --no-install-recommends --fix-missing \ + openssh-client \ + openssh-server && \ + rm /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /var/run/sshd + +ARG PYTHON_VERSION + +COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh + +# modify generate_ssh_keys to be a helper script +# print how to use helper script on bash startup +# Avoids loop for further execution of the startup file +ARG PACKAGE_OPTION=pip +ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" +RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ + echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ + cat '/generate_ssh_keys.sh' >> ~/.startup && \ + rm -rf /generate_ssh_keys.sh + +COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY multinode/sshd_config /etc/ssh/sshd_config +COPY multinode/ssh_config /etc/ssh/ssh_config + +RUN mkdir -p /licensing + +RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src/oneCCL/b7d66de16e17f88caffd7c6df4cd5e12b266af84/third-party-programs.txt -O /licensing/oneccl_third_party_programs.txt && \ + wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ + wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE + +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] + +FROM multinode as deepspeed-cpu + +ENV SIGOPT_PROJECT=. + +WORKDIR / +COPY multinode/requirements.txt requirements.txt + +RUN python -m pip install --no-cache-dir -r requirements.txt && \ + DS_BUILD_OPS=1 python -m pip install --no-cache-dir deepspeed==0.14.4 && \ + echo "Y" | pip uninstall nvidia-ml-py && \ + rm -rf requirements.txt + +ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" + +FROM deepspeed-cpu AS hf-genai + +COPY hf-genai-requirements.txt . + +RUN python -m pip install --no-cache-dir -r hf-genai-requirements.txt && \ + rm -rf hf-genai-requirements.txt + FROM ipex-xpu-base AS ipex-xpu-jupyter WORKDIR /jupyter @@ -346,3 +349,16 @@ EXPOSE 8080 8081 8082 7070 7071 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] CMD ["serve"] + +FROM multinode as deepspeed-xpu + +ENV SIGOPT_PROJECT=. + +WORKDIR / +COPY multinode/xpu-requirements.txt xpu-requirements.txt + +RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ + rm -rf xpu-requirements.txt + +RUN python -m pip install --no-cache-dir deepspeed==0.14.4 && \ + echo "Y" | pip uninstall nvidia-ml-py diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 6aeeefc9..37bf1597 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -69,7 +69,7 @@ services: network_mode: host ports: - 8888:8888 - multinode: + multinode-cpu: build: labels: dependency.apt.gcc: true @@ -81,8 +81,8 @@ services: dependency.python.pip: multinode/requirements.txt org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch MultiNode Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-multinode - target: multinode + org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-${PACKAGE_OPTION:-pip}-multinode-cpu + target: deepspeed-cpu command: > bash -c "python -c 'import neural_compressor;import oneccl_bindings_for_pytorch as oneccl;import deepspeed; print(\"Neural Compressor:\", neural_compressor.__version__, @@ -216,3 +216,26 @@ services: image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-hf-${HF_VERSION:-4.44.0} command: > bash -c "python -c 'import transformers; print(transformers.__version__)'" + multinode-xpu: + build: + args: + MULTINODE_BASE: ipex-xpu-base-wheels + labels: + dependency.apt.gcc: true + dependency.apt.libgl1-mesa-glx: true + dependency.apt.libglib2: true + dependency.apt.python3-dev: true + dependency.pip.apt.virtualenv: true + dependency.python.pip: multinode/xpu-requirements.txt + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.image.title: "Intel® Extension for PyTorch MultiNode XPU Image" + org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-${PACKAGE_OPTION:-pip}-multinode-cpu-xpu + target: deepspeed-xpu + command: > + bash -c "python -c 'import neural_compressor;import oneccl_bindings_for_pytorch as oneccl;import deepspeed; + print(\"Neural Compressor:\", neural_compressor.__version__, + \"\\nOneCCL:\", oneccl.__version__, + \"\\nDeepspeed:\", deepspeed.__version__)'" + extends: xpu + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0}-xpu + shm_size: 2gb diff --git a/pytorch/tests/tests.yaml b/pytorch/tests/tests.yaml index 21aeeadc..3c67bdf1 100644 --- a/pytorch/tests/tests.yaml +++ b/pytorch/tests/tests.yaml @@ -39,6 +39,10 @@ import-cpu-deepspeed-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: ds_report shm_size: 2gb +import-xpu-deepspeed-${PACKAGE_OPTION:-pip}: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0}-xpu + cmd: ds_report + shm_size: 2gb ipex-cpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base cmd: python /tests/ipex-resnet50.py --ipex --device cpu --backend gloo @@ -64,13 +68,21 @@ oneccl-${PACKAGE_OPTION:-pip}: volumes: - dst: /tests src: $PWD/pytorch/tests -oneccl-ds-${PACKAGE_OPTION:-pip}: +oneccl-ds-cpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: ipexrun cpu /tests/ipex-resnet50.py --ipex --device cpu --backend ccl --deepspeed privileged: true volumes: - dst: /tests src: $PWD/pytorch/tests +oneccl-ds-xpu-${PACKAGE_OPTION:-pip}: + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0}-xpu + cmd: ipexrun cpu /tests/ipex-resnet50.py --ipex --device xpu --backend ccl --deepspeed + privileged: true + device: ["/dev/dri"] + volumes: + - dst: /tests + src: $PWD/pytorch/tests inc-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-oneccl-inc-${INC_VERSION:-3.0} cmd: python /tests/inc-quant.py From 56be26ec9562f0a7f34c2ff2d92dda3a3c11161d Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Tue, 27 Aug 2024 12:59:28 -0700 Subject: [PATCH 27/50] add missing file Signed-off-by: Srikanth Ramakrishna --- pytorch/multinode/xpu-requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 pytorch/multinode/xpu-requirements.txt diff --git a/pytorch/multinode/xpu-requirements.txt b/pytorch/multinode/xpu-requirements.txt new file mode 100644 index 00000000..b9d84777 --- /dev/null +++ b/pytorch/multinode/xpu-requirements.txt @@ -0,0 +1,4 @@ +intel_extension_for_pytorch_deepspeed==2.1.40 +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us +py-cpuinfo==9.0.0 +mpi4py>=3.1.0 From 05e358305123bd6941433a9a141c3e7c7ea10ba6 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Tue, 27 Aug 2024 13:43:39 -0700 Subject: [PATCH 28/50] add neural compressor and change deepspeed version Signed-off-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 21 ++++++++++++--------- pytorch/docker-compose.yaml | 1 + pytorch/multinode/xpu-requirements.txt | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 4c1b84e7..7b65c8c5 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -138,9 +138,11 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin libglib2.0-0 \ libopenmpi-dev \ numactl \ - virtualenv + virtualenv && \ + apt-get clean -RUN apt-get install -y --no-install-recommends --fix-missing \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ openssh-client \ openssh-server && \ rm /etc/ssh/ssh_host_*_key \ @@ -150,18 +152,13 @@ RUN apt-get install -y --no-install-recommends --fix-missing \ RUN mkdir -p /var/run/sshd -ARG PYTHON_VERSION - COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup # Avoids loop for further execution of the startup file ARG PACKAGE_OPTION=pip -ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" -RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ - echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ - cat '/generate_ssh_keys.sh' >> ~/.startup && \ +RUN cat '/generate_ssh_keys.sh' >> ~/.startup && \ rm -rf /generate_ssh_keys.sh COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh @@ -178,6 +175,12 @@ ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] FROM multinode as deepspeed-cpu +ARG PYTHON_VERSION +ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" + +RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ + echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup + ENV SIGOPT_PROJECT=. WORKDIR / @@ -360,5 +363,5 @@ COPY multinode/xpu-requirements.txt xpu-requirements.txt RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ rm -rf xpu-requirements.txt -RUN python -m pip install --no-cache-dir deepspeed==0.14.4 && \ +RUN python -m pip install --no-cache-dir deepspeed==0.14.2 && \ echo "Y" | pip uninstall nvidia-ml-py diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 37bf1597..620e32ec 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -226,6 +226,7 @@ services: dependency.apt.libglib2: true dependency.apt.python3-dev: true dependency.pip.apt.virtualenv: true + dependency.pip.deepspeed: 0.14.2 dependency.python.pip: multinode/xpu-requirements.txt org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch MultiNode XPU Image" diff --git a/pytorch/multinode/xpu-requirements.txt b/pytorch/multinode/xpu-requirements.txt index b9d84777..113b656d 100644 --- a/pytorch/multinode/xpu-requirements.txt +++ b/pytorch/multinode/xpu-requirements.txt @@ -1,3 +1,4 @@ +neural-compressor==3.0 intel_extension_for_pytorch_deepspeed==2.1.40 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us py-cpuinfo==9.0.0 From ecdeddb2b2ed5c22804f69fc67324e1bff19e872 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Aug 2024 09:07:53 -0700 Subject: [PATCH 29/50] Bump the apptainer group across 1 directory with 5 updates (#335) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Srikanth Ramakrishna --- apptainer/python/requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apptainer/python/requirements.txt b/apptainer/python/requirements.txt index 9dede726..b259254d 100644 --- a/apptainer/python/requirements.txt +++ b/apptainer/python/requirements.txt @@ -1,6 +1,6 @@ -numpy==2.0.1 -setuptools==72.1.0 +numpy==2.1.0 +setuptools==73.0.1 psutil==6.0.0 -mkl==2024.2.0 -mkl-include==2024.2.0 -intel-openmp==2024.2.0 +mkl==2024.2.1 +mkl-include==2024.2.1 +intel-openmp==2024.2.1 From 577f83bf52228fedcb05c85346bf193e22c7b574 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Wed, 28 Aug 2024 11:40:17 -0700 Subject: [PATCH 30/50] fix idp build Signed-off-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 7b65c8c5..060876a5 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -141,8 +141,11 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin virtualenv && \ apt-get clean -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ +ENV SIGOPT_PROJECT=. + +ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu":${LD_LIBRARY_PATH} + +RUN apt-get install -y --no-install-recommends \ openssh-client \ openssh-server && \ rm /etc/ssh/ssh_host_*_key \ @@ -181,8 +184,6 @@ ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages" RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \ echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup -ENV SIGOPT_PROJECT=. - WORKDIR / COPY multinode/requirements.txt requirements.txt @@ -191,7 +192,7 @@ RUN python -m pip install --no-cache-dir -r requirements.txt && \ echo "Y" | pip uninstall nvidia-ml-py && \ rm -rf requirements.txt -ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" +ENV LD_LIBRARY_PATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" FROM deepspeed-cpu AS hf-genai From 3c44cb37d7c6a72ea31d32a66883cd9f079532ff Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Wed, 28 Aug 2024 12:54:02 -0700 Subject: [PATCH 31/50] fix xpu jupyter build Signed-off-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 060876a5..21b7fc33 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -201,11 +201,12 @@ COPY hf-genai-requirements.txt . RUN python -m pip install --no-cache-dir -r hf-genai-requirements.txt && \ rm -rf hf-genai-requirements.txt -FROM ipex-xpu-base AS ipex-xpu-jupyter +FROM ipex-xpu-base-wheels AS ipex-xpu-jupyter WORKDIR /jupyter COPY jupyter-requirements.txt . -RUN python -m pip install --no-cache-dir -r jupyter-requirements.txt +RUN python -m pip install --no-cache-dir -r jupyter-requirements.txt && \ + rm -rf jupyter-requirements.txt RUN if eval "which conda >/dev/null )"; then \ echo "conda activate idp" >> ~/.bashrc; \ From 1d643a672eab8f99f6dc0c0df82c74b384e50823 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Thu, 29 Aug 2024 11:23:20 -0700 Subject: [PATCH 32/50] Add Perf Sample Example Section (#307) Signed-off-by: Tyler Titsworth --- python/README.md | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/python/README.md b/python/README.md index 25d3d230..07d30dc0 100644 --- a/python/README.md +++ b/python/README.md @@ -1,19 +1,44 @@ -# Intel® Distribution for Python +# Intel® Distribution for Python* -[Intel® Distribution for Python] enhances performance and can improve your program speed from 10 to 100 times faster. It is a Python distribution that includes the [Intel® Math Kernel Library] (oneMKL) and other Intel performance libraries to enable near-native performance through acceleration of core numerical and machine learning packages. - -[Intel® Distribution for Python] is available as part of the [Intel® oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html). +[Intel® Distribution for Python*] enhances performance and can improve your program speed from 10 to 100 times faster. It is a Python* distribution that includes the [Intel® Math Kernel Library] (oneMKL) and other Intel performance libraries to enable near-native performance through acceleration of core numerical and machine learning packages. ## Images -The images below include variations for only the core packages in the [Intel® Distribution for Python] installation, or all of the packages. +The images below include variations for only the core packages in the [Intel® Distribution for Python*] installation, or all of the packages. | Tag(s) | IDP | | ---------------------- | ---------- | | `3.10-full`, `latest` | `2024.2.0` | | `3.10-core` | `2024.2.0` | -## Build from Source +## Run a Performance Sample + +To run a performance sample run the following commands: + +```bash +git clone https://github.com/intel/ai-containers +cd ai-containers/python +docker run --rm -it \ + -v $PWD/tests:/tests \ + intel/python:latest \ + python /tests/perf_sample.py +``` + +### Compare the results against stock python + +In the previous command, you should see a result at the bottom like: `Time Consuming: 0.03897857666015625`. We can compare this against `python:3.11-slim-bullseye` + +```bash +# Use the working directory from the above command +docker run --rm -it \ + -v $PWD/tests:/tests \ + python:3.11-slim-bullseye \ + bash +pip install numpy +python /tests/perf_sample.py +``` + +## Build from Source (Advanced) To build the images from source, clone the [AI Containers](https://github.com/intel/ai-containers) repository, follow the main `README.md` file to setup your environment, and run the following command: @@ -27,8 +52,8 @@ You can find the list of services below for each container in the group: | Service Name | Description | | ------------ | ------------------------------------------------------------------- | -| `idp` | Base image with [Intel® Distribution for Python] | -| `pip` | Equivalent python image without [Intel® Distribution for Python] | +| `idp` | Base image with [Intel® Distribution for Python*] | +| `pip` | Equivalent python image without [Intel® Distribution for Python*] | ## License @@ -40,5 +65,5 @@ It is the image user's responsibility to ensure that any use of The images below -[Intel® Distribution for Python]: https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html#gs.9bos9m +[Intel® Distribution for Python*]: https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html#gs.9bos9m [Intel® Math Kernel Library]: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html From 57291b3e3ddab4ada5b1b57ea01bd930bcf77c39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:33:48 -0700 Subject: [PATCH 33/50] Bump notebook from 7.2.1 to 7.2.2 in /classical-ml in the pip group (#347) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- classical-ml/jupyter-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classical-ml/jupyter-requirements.txt b/classical-ml/jupyter-requirements.txt index 2cae0f91..2be0be31 100644 --- a/classical-ml/jupyter-requirements.txt +++ b/classical-ml/jupyter-requirements.txt @@ -1,4 +1,4 @@ jupyterlab==4.2.4 jupyterhub==5.1.0 -notebook==7.2.1 +notebook==7.2.2 jupyter-server-proxy>=4.1.2 From 3f6e3a5bc457cc82f4557134d36111d62209270e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:41:03 -0700 Subject: [PATCH 34/50] Bump jupyterlab from 4.2.4 to 4.2.5 in /classical-ml in the pip group (#348) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- classical-ml/jupyter-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classical-ml/jupyter-requirements.txt b/classical-ml/jupyter-requirements.txt index 2be0be31..d98ce88b 100644 --- a/classical-ml/jupyter-requirements.txt +++ b/classical-ml/jupyter-requirements.txt @@ -1,4 +1,4 @@ -jupyterlab==4.2.4 +jupyterlab==4.2.5 jupyterhub==5.1.0 notebook==7.2.2 jupyter-server-proxy>=4.1.2 From 69b24d3d5e2df5da32fdff7f3b3be256edee9bc1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 10:46:44 -0700 Subject: [PATCH 35/50] Bump the docs group in /docs with 2 updates (#351) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 1058a38f..33207ff8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,9 @@ mkdocs-callouts>=1.13.2 mkdocs-git-authors-plugin>=0.8.0 mkdocs-git-revision-date-localized-plugin>=1.2.5 -mkdocs-material==9.5.33 +mkdocs-material==9.5.34 mkdocs-table-reader-plugin>=2.1.0 -mkdocs==1.6.0 +mkdocs==1.6.1 pandas>=2.0.3 pymdown-extensions>=10.8.1 python_on_whales>=0.71.0 From b95845e7e676321799a0b695623f81171a73b098 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 18:00:24 +0000 Subject: [PATCH 36/50] Bump actions/upload-artifact from 4.3.6 to 4.4.0 (#354) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/scorecard.yaml | 2 +- .github/workflows/security-report.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 1c08423e..5c5ab9e5 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -48,7 +48,7 @@ jobs: results_format: sarif repo_token: ${{ secrets.GITHUB_TOKEN }} publish_results: true - - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 + - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/security-report.yaml b/.github/workflows/security-report.yaml index a9d3b98b..07290f08 100644 --- a/.github/workflows/security-report.yaml +++ b/.github/workflows/security-report.yaml @@ -35,7 +35,7 @@ jobs: sarifReportDir: ${{ github.workspace }} template: report token: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 + - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: Security Report Summary path: ./*.pdf From f7e8c19499d13f7999cc3396996e54301b23ae02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 11:13:29 -0700 Subject: [PATCH 37/50] Bump github/codeql-action from 3.26.5 to 3.26.6 (#355) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/container-ci.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index 27ccabad..a64e2307 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -155,7 +155,7 @@ jobs: - name: Cleanup if: always() run: docker rmi -f ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} - - uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 + - uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 with: sarif_file: '${{ matrix.container }}-scan.sarif' category: '${{ matrix.container }}' diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 5c5ab9e5..ecdde523 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -53,6 +53,6 @@ jobs: name: SARIF file path: results.sarif retention-days: 5 - - uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 + - uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 with: sarif_file: results.sarif From 536e096785a2c6905decd050a8e95513c614a784 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 18:27:02 +0000 Subject: [PATCH 38/50] Bump actions/setup-python from 5.1.1 to 5.2.0 (#357) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/docs.yaml | 2 +- .github/workflows/test-runner-ci.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 0b8742c6..997f3e6a 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -36,7 +36,7 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: 3.8 cache: pip diff --git a/.github/workflows/test-runner-ci.yaml b/.github/workflows/test-runner-ci.yaml index 448e0970..75aa8c06 100644 --- a/.github/workflows/test-runner-ci.yaml +++ b/.github/workflows/test-runner-ci.yaml @@ -45,7 +45,7 @@ jobs: registry: ${{ secrets.REGISTRY }} username: ${{ secrets.REGISTRY_USER }} password: ${{ secrets.REGISTRY_TOKEN }} - - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: ${{ matrix.python }} - name: Install requirements @@ -88,7 +88,7 @@ jobs: registry: ${{ secrets.REGISTRY }} username: ${{ secrets.REGISTRY_USER }} password: ${{ secrets.REGISTRY_TOKEN }} - - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: "3.8" - name: Test Container Group From bde3ae0131b032a5562097bf91a2d62dd7c7cd44 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Wed, 4 Sep 2024 08:20:15 -0700 Subject: [PATCH 39/50] Compare against same Python Version (#358) Signed-off-by: Tyler Titsworth --- python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/README.md b/python/README.md index 07d30dc0..16b1d591 100644 --- a/python/README.md +++ b/python/README.md @@ -32,7 +32,7 @@ In the previous command, you should see a result at the bottom like: `Time Consu # Use the working directory from the above command docker run --rm -it \ -v $PWD/tests:/tests \ - python:3.11-slim-bullseye \ + python:3.10-slim-bullseye \ bash pip install numpy python /tests/perf_sample.py From 31730a917ebe8528313cdeb61de566ba8528f3df Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Thu, 5 Sep 2024 09:27:46 -0700 Subject: [PATCH 40/50] Create Container Map (#360) Signed-off-by: tylertitsworth --- .github/release/v0.4.0.json | 230 ++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 .github/release/v0.4.0.json diff --git a/.github/release/v0.4.0.json b/.github/release/v0.4.0.json new file mode 100644 index 00000000..2fc6b506 --- /dev/null +++ b/.github/release/v0.4.0.json @@ -0,0 +1,230 @@ +[ + { + "base": "ubuntu:22.04", + "dockerfile": "python/Dockerfile", + "repo": "intel/python", + "tag": "3.10-core" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "python/Dockerfile", + "repo": "intel/python", + "tag": "3.10-full" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "classical-ml/Dockerfile", + "repo": "intel/intel-optimized-ml", + "tag": "2024.6.0-pip-base" + }, + { + "base": "intel/intel-optimized-ml:2024.6.0-pip-base", + "dockerfile": "classical-ml/Dockerfile", + "repo": "intel/intel-optimized-ml", + "tag": "2024.6.0-pip-jupyter" + }, + { + "base": "intel/python:3.10-core", + "dockerfile": "classical-ml/Dockerfile", + "repo": "intel/intel-optimized-ml", + "tag": "2024.6.0-idp-base" + }, + { + "base": "intel/intel-optimized-ml:2024.6.0-idp-base", + "dockerfile": "classical-ml/Dockerfile", + "repo": "intel/intel-optimized-ml", + "tag": "2024.6.0-idp-jupyter" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.0.1-xpu-pip-base" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.0.1-xpu-pip-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.0.1-xpu-pip-jupyter" + }, + { + "base": "intel/python:3.10-core", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.0.1-xpu-idp-base" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.0.1-xpu-idp-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.0.1-xpu-idp-jupyter" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-pip-base" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.1-pip-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-pip-jupyter" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.1-pip-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-pip-multinode" + }, + { + "base": "intel/python:3.10-core", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-idp-base" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.1-idp-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-idp-jupyter" + }, + { + "base": "intel/intel-optimized-tensorflow:2.15.1-idp-base", + "dockerfile": "tensorflow/Dockerfile", + "repo": "intel/intel-optimized-tensorflow", + "tag": "2.15.1-idp-multinode" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.1.40-xpu-pip-base" + }, + { + "base": "intel/intel-optimized-pytorch:2.1.40-xpu-pip-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.1.40-xpu-pip-jupyter" + }, + { + "base": "intel/python:3.10-core", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.1.40-xpu-idp-base" + }, + { + "base": "intel/intel-optimized-pytorch:2.1.40-xpu-idp-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.1.40-xpu-idp-jupyter" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-pip-base" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-pip-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-pip-jupyter" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-pip-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-pip-multinode" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-pip-multinode", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-pip-multinode-hf-4.44.0-genai" + }, + { + "base": "intel/python:3.10-core", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-idp-base" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-idp-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-idp-jupyter" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-idp-base", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-idp-multinode" + }, + { + "base": "intel/intel-optimized-pytorch:2.4.0-idp-multinode", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-idp-multinode-hf-4.44.0-genai" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-serving-cpu" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "pytorch/Dockerfile", + "repo": "intel/intel-optimized-pytorch", + "tag": "2.4.0-serving-xpu" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/classical-ml", + "repo": "intel/classical-ml", + "tag": "latest-py3.9" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/classical-ml", + "repo": "intel/classical-ml", + "tag": "latest-py3.10" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/data-analytics", + "repo": "intel/data-analytics", + "tag": "latest-py3.9" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/data-analytics", + "repo": "intel/data-analytics", + "tag": "latest-py3.10" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/deep-learning", + "repo": "intel/deep-learning", + "tag": "latest-py3.9" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/deep-learning", + "repo": "intel/deep-learning", + "tag": "latest-py3.10" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/inference-optimization", + "repo": "intel/inference-optimization", + "tag": "latest-py3.9" + }, + { + "base": "ubuntu:22.04", + "dockerfile": "preset/inference-optimization", + "repo": "intel/inference-optimization", + "tag": "latest-py3.10" + } +] From 0af8f7ff5574664f2687f5467b6dc6a0ee153493 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 6 Sep 2024 08:19:56 -0700 Subject: [PATCH 41/50] Patch Container Mapping (#361) Signed-off-by: Tyler Titsworth --- .github/release/v0.4.0.json | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/release/v0.4.0.json b/.github/release/v0.4.0.json index 2fc6b506..f9cd6ef2 100644 --- a/.github/release/v0.4.0.json +++ b/.github/release/v0.4.0.json @@ -81,19 +81,19 @@ "base": "intel/python:3.10-core", "dockerfile": "tensorflow/Dockerfile", "repo": "intel/intel-optimized-tensorflow", - "tag": "2.15.1-idp-base" + "tag": "2.15.0-idp-base" }, { - "base": "intel/intel-optimized-tensorflow:2.15.1-idp-base", + "base": "intel/intel-optimized-tensorflow:2.15.0-idp-base", "dockerfile": "tensorflow/Dockerfile", "repo": "intel/intel-optimized-tensorflow", - "tag": "2.15.1-idp-jupyter" + "tag": "2.15.0-idp-jupyter" }, { - "base": "intel/intel-optimized-tensorflow:2.15.1-idp-base", + "base": "intel/intel-optimized-tensorflow:2.15.0-idp-base", "dockerfile": "tensorflow/Dockerfile", "repo": "intel/intel-optimized-tensorflow", - "tag": "2.15.1-idp-multinode" + "tag": "2.15.0-idp-multinode" }, { "base": "ubuntu:22.04", @@ -181,49 +181,49 @@ }, { "base": "ubuntu:22.04", - "dockerfile": "preset/classical-ml", + "dockerfile": "preset/classical-ml/Dockerfile", "repo": "intel/classical-ml", "tag": "latest-py3.9" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/classical-ml", + "dockerfile": "preset/classical-ml/Dockerfile", "repo": "intel/classical-ml", "tag": "latest-py3.10" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/data-analytics", + "dockerfile": "preset/data-analytics/Dockerfile", "repo": "intel/data-analytics", "tag": "latest-py3.9" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/data-analytics", + "dockerfile": "preset/data-analytics/Dockerfile", "repo": "intel/data-analytics", "tag": "latest-py3.10" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/deep-learning", + "dockerfile": "preset/deep-learning/Dockerfile", "repo": "intel/deep-learning", "tag": "latest-py3.9" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/deep-learning", + "dockerfile": "preset/deep-learning/Dockerfile", "repo": "intel/deep-learning", "tag": "latest-py3.10" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/inference-optimization", + "dockerfile": "preset/inference-optimization/Dockerfile", "repo": "intel/inference-optimization", "tag": "latest-py3.9" }, { "base": "ubuntu:22.04", - "dockerfile": "preset/inference-optimization", + "dockerfile": "preset/inference-optimization/Dockerfile", "repo": "intel/inference-optimization", "tag": "latest-py3.10" } From 2e9bee7260fc0f9b2d15a2f993fb0517bdbe561e Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 6 Sep 2024 09:51:51 -0700 Subject: [PATCH 42/50] Patch container mapping (2) (#363) Signed-off-by: Tyler Titsworth --- .github/release/v0.4.0.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/release/v0.4.0.json b/.github/release/v0.4.0.json index f9cd6ef2..671769e9 100644 --- a/.github/release/v0.4.0.json +++ b/.github/release/v0.4.0.json @@ -141,7 +141,7 @@ "base": "intel/intel-optimized-pytorch:2.4.0-pip-multinode", "dockerfile": "pytorch/Dockerfile", "repo": "intel/intel-optimized-pytorch", - "tag": "2.4.0-pip-multinode-hf-4.44.0-genai" + "tag": "2.4.0-pip-hf-4.44.0-genai" }, { "base": "intel/python:3.10-core", @@ -165,7 +165,7 @@ "base": "intel/intel-optimized-pytorch:2.4.0-idp-multinode", "dockerfile": "pytorch/Dockerfile", "repo": "intel/intel-optimized-pytorch", - "tag": "2.4.0-idp-multinode-hf-4.44.0-genai" + "tag": "2.4.0-idp-hf-4.44.0-genai" }, { "base": "ubuntu:22.04", From 02e20979861586db7e849654e793be96849c996d Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 6 Sep 2024 17:04:14 -0700 Subject: [PATCH 43/50] Test CPU Validation Runners (#362) Signed-off-by: Tyler Titsworth Signed-off-by: tylertitsworth --- classical-ml/.actions.json | 2 +- preset/classical-ml/.actions.json | 2 +- preset/classical-ml/tests.yaml | 12 ++++++------ preset/data-analytics/.actions.json | 2 +- preset/data-analytics/tests.yaml | 6 +++--- python/.actions.json | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/classical-ml/.actions.json b/classical-ml/.actions.json index 36e21ad8..e7e793d3 100644 --- a/classical-ml/.actions.json +++ b/classical-ml/.actions.json @@ -1,5 +1,5 @@ { "PACKAGE_OPTION": ["idp", "pip"], "experimental": [true], - "runner_label": ["PVC"] + "runner_label": ["clx"] } diff --git a/preset/classical-ml/.actions.json b/preset/classical-ml/.actions.json index 639f025c..bc955304 100644 --- a/preset/classical-ml/.actions.json +++ b/preset/classical-ml/.actions.json @@ -1,5 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "runner_label": ["PVC"] + "runner_label": ["clx"] } diff --git a/preset/classical-ml/tests.yaml b/preset/classical-ml/tests.yaml index 14529526..919eb4e9 100644 --- a/preset/classical-ml/tests.yaml +++ b/preset/classical-ml/tests.yaml @@ -21,23 +21,23 @@ # img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py3.9 modin-${PYTHON_VERSION:-3.9}: cmd: conda run -n classical-ml sample-tests/modin/test_modin.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} shm_size: 10.24G modin-notebook-${PYTHON_VERSION:-3.9}: cmd: papermill --log-output jupyter/modin/IntelModin_Vs_Pandas.ipynb -k classical-ml - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True scikit-${PYTHON_VERSION:-3.9}: cmd: conda run -n classical-ml sample-tests/scikit/test_scikit.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} scikit-notebook-${PYTHON_VERSION:-3.9}: cmd: papermill --log-output jupyter/sklearn/Intel_Extension_For_SKLearn_GettingStarted.ipynb -k classical-ml - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True xgboost-${PYTHON_VERSION:-3.9}: cmd: conda run -n classical-ml sample-tests/xgboost/test_xgboost.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} xgboost-notebook-${PYTHON_VERSION:-3.9}: cmd: papermill --log-output jupyter/xgboost/IntelPython_XGBoost_Performance.ipynb -k classical-ml - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True diff --git a/preset/data-analytics/.actions.json b/preset/data-analytics/.actions.json index 639f025c..bc955304 100644 --- a/preset/data-analytics/.actions.json +++ b/preset/data-analytics/.actions.json @@ -1,5 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "runner_label": ["PVC"] + "runner_label": ["clx"] } diff --git a/preset/data-analytics/tests.yaml b/preset/data-analytics/tests.yaml index 5aff8c81..846bf0b9 100644 --- a/preset/data-analytics/tests.yaml +++ b/preset/data-analytics/tests.yaml @@ -14,12 +14,12 @@ dataset-librarian-${PYTHON_VERSION:-3.9}: cmd: conda run -n data-analytics bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} modin-${PYTHON_VERSION:-3.9}: cmd: conda run -n data-analytics sample-tests/modin/test_modin.sh - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} shm_size: 10G modin-notebook-${PYTHON_VERSION:-3.9}: cmd: papermill --log-output jupyter/modin/IntelModin_Vs_Pandas.ipynb -k data-analytics - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.2.0}-py${PYTHON_VERSION:-3.9} notebook: True diff --git a/python/.actions.json b/python/.actions.json index db103a36..1112d076 100644 --- a/python/.actions.json +++ b/python/.actions.json @@ -1,5 +1,5 @@ { "IDP_VERSION": ["full", "core"], "experimental": [true], - "runner_label": ["PVC"] + "runner_label": ["clx"] } From d1cc89f0c568c1a8c0ce6a23076cc9d33544ba53 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 08:34:15 -0700 Subject: [PATCH 44/50] Bump pydantic from 2.8.2 to 2.9.1 in /test-runner in the test-runner group (#371) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- test-runner/dev-requirements.txt | 2 +- test-runner/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test-runner/dev-requirements.txt b/test-runner/dev-requirements.txt index 2f4f8fbf..e409fb0f 100644 --- a/test-runner/dev-requirements.txt +++ b/test-runner/dev-requirements.txt @@ -3,7 +3,7 @@ coverage>=7.5.0 coveralls>=4.0.1 expandvars>=0.12.0 hypothesis>=6.100.1 -pydantic==2.8.2 +pydantic==2.9.1 pylint>=3.1.0 pytest>=8.1.1 python_on_whales>=0.70.1 diff --git a/test-runner/requirements.txt b/test-runner/requirements.txt index 79752623..ee95cc0a 100644 --- a/test-runner/requirements.txt +++ b/test-runner/requirements.txt @@ -1,5 +1,5 @@ expandvars>=0.12.0 -pydantic==2.8.2 +pydantic==2.9.1 python_on_whales>=0.70.1 pyyaml>=6.0.1 tabulate>=0.9.0 From 70bfa5655e2ed6e7023eaa2e195a5f0eae61e58f Mon Sep 17 00:00:00 2001 From: Sharvil Shah Date: Tue, 10 Sep 2024 10:03:50 -0700 Subject: [PATCH 45/50] Gaudi OpenShift notebook container (#365) Signed-off-by: sharvil10 --- .github/dependabot.yml | 8 + .../gaudi/demo/oneapi-sample.ipynb | 315 ++++++++++++++++++ .../gaudi/docker/Dockerfile.rhel9.2 | 236 +++++++++++++ .../gaudi/docker/Dockerfile.rhel9.4 | 249 ++++++++++++++ .../openshift-ai/gaudi/docker/builder/run | 39 +++ .../gaudi/docker/docker-compose.yaml | 64 ++++ .../gaudi/docker/install-python310.sh | 103 ++++++ .../openshift-ai/gaudi/docker/install_efa.sh | 40 +++ .../gaudi/docker/install_packages.sh | 46 +++ .../gaudi/docker/requirements.txt | 43 +++ .../gaudi/docker/start-notebook.sh | 59 ++++ .../gaudi/docker/utils/process.sh | 34 ++ .../openshift-ai/{ => oneapi}/README.md | 0 .../{ => oneapi}/assets/step-1.png | Bin .../{ => oneapi}/assets/step-2.png | Bin .../{ => oneapi}/assets/step-3.png | Bin .../{ => oneapi}/assets/step-4.png | Bin .../manifests/intel-optimized-ml.yaml | 0 .../manifests/intel-optimized-pytorch.yaml | 0 .../manifests/intel-optimized-tensorflow.yaml | 0 20 files changed, 1236 insertions(+) create mode 100644 enterprise/redhat/openshift-ai/gaudi/demo/oneapi-sample.ipynb create mode 100644 enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.2 create mode 100644 enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.4 create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/builder/run create mode 100644 enterprise/redhat/openshift-ai/gaudi/docker/docker-compose.yaml create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/install-python310.sh create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/install_efa.sh create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/install_packages.sh create mode 100644 enterprise/redhat/openshift-ai/gaudi/docker/requirements.txt create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/start-notebook.sh create mode 100755 enterprise/redhat/openshift-ai/gaudi/docker/utils/process.sh rename enterprise/redhat/openshift-ai/{ => oneapi}/README.md (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/assets/step-1.png (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/assets/step-2.png (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/assets/step-3.png (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/assets/step-4.png (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/manifests/intel-optimized-ml.yaml (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/manifests/intel-optimized-pytorch.yaml (100%) rename enterprise/redhat/openshift-ai/{ => oneapi}/manifests/intel-optimized-tensorflow.yaml (100%) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1fe07741..4289ee44 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -95,3 +95,11 @@ updates: package-ecosystem: pip schedule: interval: weekly + - directory: enterprise/redhat/openshift-ai/gaudi/docker + groups: + gaudi-openshift: + patterns: + - "requirements.txt" + package-ecosystem: pip + schedule: + interval: weekly diff --git a/enterprise/redhat/openshift-ai/gaudi/demo/oneapi-sample.ipynb b/enterprise/redhat/openshift-ai/gaudi/demo/oneapi-sample.ipynb new file mode 100644 index 00000000..9bd94af3 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/demo/oneapi-sample.ipynb @@ -0,0 +1,315 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1e973d1b-c6d0-48a5-a774-0f114101e81e", + "metadata": {}, + "source": [ + "# Getting started with PyTorch on Intel® Gaudi.\n", + "\n", + "This notebook is to help you get started quickly using the Intel® Gaudi accelerator in this container. A simple MNIST model is trained on the Gaudi acclerator. You can tune some of the parameters below to change configuration of the training. For more information and reference please refer to the official documentation of [Intel® Gaudi acclerator](https://docs.habana.ai/en/latest/index.html)." + ] + }, + { + "cell_type": "markdown", + "id": "7eaacf55-bea2-43be-bb48-163848db1a30", + "metadata": { + "tags": [] + }, + "source": [ + "### Setup modes for training\n", + "\n", + "1. lazy_mode: Set to True(False) to enable(disable) lazy mode.\n", + "2. enable_amp: Set to True(False) to enable Automatic Mixed Precision.\n", + "3. epochs: Number of epochs for training\n", + "4. lr: Learning rate for training\n", + "5. batch_size: Number of samples in a batch\n", + "6. milestones: Milestone epochs for the stepLR scheduler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e7cf831-6fe6-46ed-a6fd-f2651cc226af", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "lazy_mode = False\n", + "enable_amp = False\n", + "epochs = 20\n", + "batch_size = 128\n", + "lr = 0.01\n", + "milestones = [10,15]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cee8ad90-c52d-4a50-876f-ce0762cb1b62", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['HABANA_LOGS']='/opt/app-root/logs'\n", + "if lazy_mode:\n", + " os.environ['PT_HPU_LAZY_MODE'] = '1'\n", + "else:\n", + " os.environ['PT_HPU_LAZY_MODE'] = '0'" + ] + }, + { + "cell_type": "markdown", + "id": "6eac33d0-2e64-4233-8b3f-40bb7217fef8", + "metadata": { + "tags": [] + }, + "source": [ + "### Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06ad44ff-9744-4d6f-af90-375e64717b59", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "import torchvision.transforms as transforms\n", + "import os\n", + "\n", + "# Import Habana Torch Library\n", + "import habana_frameworks.torch.core as htcore" + ] + }, + { + "cell_type": "markdown", + "id": "062de7f3-4561-4af3-a9ed-2c4cfc918f2f", + "metadata": {}, + "source": [ + "### Define Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9df57abb-0b63-4e1c-9d9b-87e74964300e", + "metadata": {}, + "outputs": [], + "source": [ + "class SimpleModel(nn.Module):\n", + " def __init__(self):\n", + " super(SimpleModel, self).__init__()\n", + "\n", + " self.fc1 = nn.Linear(784, 256)\n", + " self.fc2 = nn.Linear(256, 64)\n", + " self.fc3 = nn.Linear(64, 10)\n", + "\n", + " def forward(self, x):\n", + "\n", + " out = x.view(-1,28*28)\n", + " out = F.relu(self.fc1(out))\n", + " out = F.relu(self.fc2(out))\n", + " out = self.fc3(out)\n", + "\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "id": "d899885b-5b4d-4557-a90c-9d507875c2ee", + "metadata": {}, + "source": [ + "### Define training routine" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b17e9aa-fa11-4870-a7d4-183b803177ab", + "metadata": {}, + "outputs": [], + "source": [ + "def train(net,criterion,optimizer,trainloader,device):\n", + "\n", + " net.train()\n", + " if not lazy_mode:\n", + " net = torch.compile(net,backend=\"hpu_backend\")\n", + " train_loss = 0.0\n", + " correct = 0\n", + " total = 0\n", + "\n", + " for batch_idx, (data, targets) in enumerate(trainloader):\n", + "\n", + " data, targets = data.to(device), targets.to(device)\n", + "\n", + " optimizer.zero_grad()\n", + " if enable_amp:\n", + " with torch.autocast(device_type=\"hpu\", dtype=torch.bfloat16):\n", + " outputs = net(data)\n", + " loss = criterion(outputs, targets)\n", + " else:\n", + " outputs = net(data)\n", + " loss = criterion(outputs, targets)\n", + "\n", + " loss.backward()\n", + " \n", + " # API call to trigger execution\n", + " if lazy_mode:\n", + " htcore.mark_step()\n", + " \n", + " optimizer.step()\n", + "\n", + " # API call to trigger execution\n", + " if lazy_mode:\n", + " htcore.mark_step()\n", + "\n", + " train_loss += loss.item()\n", + " _, predicted = outputs.max(1)\n", + " total += targets.size(0)\n", + " correct += predicted.eq(targets).sum().item()\n", + "\n", + " train_loss = train_loss/(batch_idx+1)\n", + " train_acc = 100.0*(correct/total)\n", + " print(\"Training loss is {} and training accuracy is {}\".format(train_loss,train_acc))" + ] + }, + { + "cell_type": "markdown", + "id": "b7a22d69-a91f-48e1-8fac-e1cfe68590b7", + "metadata": {}, + "source": [ + "### Define testing routine" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9aa379b-b376-4623-9b5c-f778c3d90ce7", + "metadata": {}, + "outputs": [], + "source": [ + "def test(net,criterion,testloader,device):\n", + "\n", + " net.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " total = 0\n", + "\n", + " with torch.no_grad():\n", + "\n", + " for batch_idx, (data, targets) in enumerate(testloader):\n", + "\n", + " data, targets = data.to(device), targets.to(device)\n", + " \n", + " if enable_amp:\n", + " with torch.autocast(device_type=\"hpu\", dtype=torch.bfloat16):\n", + " outputs = net(data)\n", + " loss = criterion(outputs, targets)\n", + " else:\n", + " outputs = net(data)\n", + " loss = criterion(outputs, targets)\n", + "\n", + "\n", + " # API call to trigger execution\n", + " if lazy_mode:\n", + " htcore.mark_step()\n", + "\n", + " test_loss += loss.item()\n", + " _, predicted = outputs.max(1)\n", + " total += targets.size(0)\n", + " correct += predicted.eq(targets).sum().item()\n", + "\n", + " test_loss = test_loss/(batch_idx+1)\n", + " test_acc = 100.0*(correct/total)\n", + " print(\"Testing loss is {} and testing accuracy is {}\".format(test_loss,test_acc))" + ] + }, + { + "cell_type": "markdown", + "id": "22e76af9-e355-4299-b84d-f34c9a25e76d", + "metadata": {}, + "source": [ + "### Run the main routine to train and test the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c8ddfb1-d4f7-44b2-aff0-f86f1db8c971", + "metadata": {}, + "outputs": [], + "source": [ + "load_path = './data'\n", + "save_path = './checkpoints'\n", + "\n", + "if(not os.path.exists(save_path)):\n", + " os.makedirs(save_path)\n", + "\n", + "# Target the Gaudi HPU device\n", + "device = torch.device(\"hpu\")\n", + "\n", + "# Data\n", + "transform = transforms.Compose([\n", + " transforms.ToTensor(),\n", + "])\n", + "\n", + "trainset = torchvision.datasets.MNIST(root=load_path, train=True,\n", + " download=True, transform=transform)\n", + "trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,\n", + " shuffle=True, num_workers=2)\n", + "testset = torchvision.datasets.MNIST(root=load_path, train=False,\n", + " download=True, transform=transform)\n", + "testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,\n", + " shuffle=False, num_workers=2)\n", + "\n", + "net = SimpleModel()\n", + "net.to(device)\n", + "\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = optim.SGD(net.parameters(), lr=lr,\n", + " momentum=0.9, weight_decay=5e-4)\n", + "scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1)\n", + "\n", + "for epoch in range(1, epochs+1):\n", + " print(\"=====================================================================\")\n", + " print(\"Epoch : {}\".format(epoch))\n", + " train(net,criterion,optimizer,trainloader,device)\n", + " test(net,criterion,testloader,device)\n", + "\n", + " torch.save(net.state_dict(), os.path.join(save_path,'epoch_{}.pth'.format(epoch)))\n", + "\n", + " scheduler.step()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.2 b/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.2 new file mode 100644 index 00000000..d1449fd8 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.2 @@ -0,0 +1,236 @@ +ARG BASE_IMAGE +ARG BASE_TAG +FROM ${BASE_IMAGE}:${BASE_TAG} AS gaudi-base +ARG ARTIFACTORY_URL +ARG VERSION +ARG REVISION + +LABEL vendor="Intel Corporation" +LABEL release="${VERSION}-${REVISION}" + +ENV HOME="/opt/app-root/src" +WORKDIR /opt/app-root/src + +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf clean all && rm -rf /var/cache/yum + +RUN echo "[BaseOS]" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "name=CentOS Linux 9 - BaseOS" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo + +RUN echo "[centos9]" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "name=CentOS Linux 9 - AppStream" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo + +RUN dnf install -y \ + clang \ + cmake3 \ + cpp \ + gcc \ + gcc-c++ \ + glibc \ + glibc-headers \ + glibc-devel \ + jemalloc \ + libarchive \ + libksba \ + unzip \ + llvm \ + lsof \ + python3-devel \ + openssh-clients \ + openssl \ + openssl-devel \ + libjpeg-devel \ + openssh-server \ + lsb_release \ + wget \ + git \ + libffi-devel \ + bzip2-devel \ + zlib-devel \ + mesa-libGL \ + iproute \ + python3-dnf-plugin-versionlock && \ + # update pkgs (except OS version) for resolving potentials CVEs + dnf versionlock add redhat-release* && \ + dnf update -y && \ + dnf clean all && rm -rf /var/cache/yum + +RUN mkdir -p /licenses && \ + wget -O /licenses/LICENSE https://raw.githubusercontent.com/intel/ai-containers/main/LICENSE + +ENV PYTHON_VERSION=3.10 +COPY install-python310.sh . +RUN ./install-python310.sh rhel9.2 && rm install-python310.sh +ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +COPY install_efa.sh . +RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh + +ENV LIBFABRIC_VERSION="1.20.0" +ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}" +ENV MPI_ROOT=/opt/amazon/openmpi +ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH +ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH +ENV OPAL_PREFIX=${MPI_ROOT} +ENV MPICC=${MPI_ROOT}/bin/mpicc +ENV RDMAV_FORK_SAFE=1 +ENV FI_EFA_USE_DEVICE_RDMA=1 + +RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \ + echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.2" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.2/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo + +# for Habana GPG key with SHA-1 signature +RUN update-crypto-policies --set DEFAULT:SHA1 + +RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \ + habanalabs-thunk-"$VERSION"-"$REVISION".el9 \ + habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \ + habanalabs-graph-"$VERSION"-"$REVISION".el9 && \ + rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo && rm -rf /tmp/* && \ + dnf clean all && rm -rf /var/cache/yum + +RUN rpm -V habanalabs-rdma-core && rpm -V habanalabs-thunk && rpm -V habanalabs-firmware-tools && rpm -V habanalabs-graph + +# There is no need to store pip installation files inside docker image +ENV PIP_NO_CACHE_DIR=on +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 +ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src +ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib + +RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \ + cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \ + cd /tmp/libfabric-${LIBFABRIC_VERSION} && \ + ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \ + make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION} + +RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \ + unzip /tmp/main.zip -d /tmp && \ + cd /tmp/hccl_ofi_wrapper-main && \ + make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \ + cd / && \ + rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main + +ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so +ENV HABANA_LOGS=/opt/app-root/log/habana_logs/ +ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw +ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins + +ENV APP_ROOT="/opt/app-root" + +RUN python3.10 -m pip install "pip>=23.3" "setuptools>=70.0.0" "wheel==0.38.4" + +WORKDIR ${APP_ROOT} + +RUN python3.10 -m venv ${APP_ROOT} && \ + wget -O ${APP_ROOT}/bin/fix-permissions \ + https://raw.githubusercontent.com/sclorg/s2i-python-container/master/3.9-minimal/root/usr/bin/fix-permissions && \ + chown -R 1001:0 ${APP_ROOT} && \ + chmod +x ${APP_ROOT}/bin/fix-permissions && \ + ${APP_ROOT}/bin/fix-permissions ${APP_ROOT} -P && \ + echo "unset BASH_ENV PROMPT_COMMAND ENV" >> ${APP_ROOT}/bin/activate + +USER 1001 + +ENV BASH_ENV="${APP_ROOT}/bin/activate" +ENV ENV="${APP_ROOT}/bin/activate" +ENV PROMPT_COMMAND=". ${APP_ROOT}/bin/activate" + +SHELL ["/bin/bash", "-c"] + +RUN python -m pip install habana_media_loader=="${VERSION}"."${REVISION}" + + +FROM gaudi-base AS gaudi-pytorch + +ARG PT_VERSION +ARG VERSION +ARG REVISION +ARG ARTIFACTORY_URL +ENV BASE_NAME=rhel9.2 + +LABEL name="PyTorch Installer" +LABEL summary="Habanalabs PyTorch installer layer for RHEL9.2" +LABEL description="Image with pre installed Habanalabs packages for PyTorch" + +RUN echo "/usr/lib/habanalabs" > $(python -c "import sysconfig; print(sysconfig.get_path('platlib'))")/habanalabs-graph.pth + +USER root + +RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "name=CentOS Linux 9 - CRB" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo + +RUN dnf install --allowerasing -y \ + curl \ + cairo-devel \ + numactl-devel \ + iproute \ + which \ + zlib-devel \ + lapack-devel \ + openblas-devel \ + numactl \ + gperftools-devel && \ + dnf clean all && rm -rf /var/cache/yum + +RUN dnf config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo -y && \ + dnf install --allowerasing -y intel-mkl-64bit-2020.4-912 && \ + dnf clean all && rm -rf /var/cache/yum + +# Set LD_PRELOAD after all required installations to +# avoid warnings during docker creation +ENV LD_PRELOAD=/lib64/libtcmalloc.so.4 +ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768 + +RUN rm -rf /tmp/* + +USER 1001 + +COPY --chown=1001:0 install_packages.sh . +RUN ./install_packages.sh && rm -f install_packages.sh + +USER root + +RUN /sbin/ldconfig && echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc && \ + chown 1001:0 ~/.bashrc + +USER 1001 + +FROM gaudi-pytorch AS gaudi-notebooks + +WORKDIR ${APP_ROOT}/src + +COPY --chown=1001:0 requirements.txt requirements.txt +COPY --chown=1001:0 start-notebook.sh /opt/app-root/bin +COPY --chown=1001:0 builder /opt/app-root/builder +COPY --chown=1001:0 utils /opt/app-root/bin/utils + +USER 1001 + +RUN python -m pip install -r requirements.txt && \ + chmod -R g+w ${APP_ROOT}/lib/python3.10/site-packages && \ + fix-permissions ${APP_ROOT} -P && \ + chmod -R g+w /opt/app-root/src && \ + sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \ + jupyter labextension disable "@jupyterlab/apputils-extension:announcements" + +RUN cd ${APP_ROOT}/ && \ + git clone https://github.com/HabanaAI/vllm-fork.git && \ + cd vllm-fork && \ + VLLM_TARGET_DEVICE=hpu pip install -e . + +WORKDIR ${APP_ROOT}/src +ENV NOTEBOOK_SAMPLE_LINK="https://raw.githubusercontent.com/sharvil10/ai-containers/main/enterprise/redhat/openshift-ai/gaudi/demo/Getting-started.ipynb" + +ENTRYPOINT ["bash", "-c", "/opt/app-root/builder/run"] diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.4 b/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.4 new file mode 100644 index 00000000..18eeef28 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/Dockerfile.rhel9.4 @@ -0,0 +1,249 @@ +ARG BASE_IMAGE +ARG BASE_TAG +FROM ${BASE_IMAGE}:${BASE_TAG} AS gaudi-base +ARG ARTIFACTORY_URL +ARG VERSION +ARG REVISION + +LABEL vendor="Intel Corporation" +LABEL release="${VERSION}-${REVISION}" + +ENV HOME="/opt/app-root/src" +WORKDIR /opt/app-root/src + +RUN echo "[BaseOS]" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "name=CentOS Linux 9 - BaseOS" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo + +RUN echo "[centos9]" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "name=CentOS Linux 9 - AppStream" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo + +RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "name=CentOS Linux 9 - CRB" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo + +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf clean all && rm -rf /var/cache/yum + +RUN dnf install -y \ + clang \ + cmake3 \ + cpp \ + gcc \ + gcc-c++ \ + glibc \ + glibc-headers \ + glibc-devel \ + jemalloc \ + libarchive \ + libksba \ + unzip \ + llvm \ + lsof \ + python3-devel \ + openssh-clients \ + openssl-1:3.0.7-27.el9 \ + openssl-devel-1:3.0.7-27.el9 \ + libjpeg-devel \ + openssh-server \ + lsb_release \ + wget \ + git \ + libffi-devel \ + bzip2-devel \ + zlib-devel \ + mesa-libGL \ + iproute \ + python3.11 \ + python3.11-pip \ + python3.11-devel \ + ffmpeg-free \ + perl-Net-SSLeay-1.92-2.el9 \ + python3-dnf-plugin-versionlock && \ + # update pkgs (except OS version) for resolving potentials CVEs + dnf versionlock add redhat-release* openssl* perl-Net-SSLeay && \ + dnf update -y && \ + dnf clean all && rm -rf /var/cache/yum + +RUN mkdir -p /licenses && \ + wget -O /licenses/LICENSE https://raw.githubusercontent.com/intel/ai-containers/main/LICENSE + +RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 2 && \ + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \ + alternatives --set python3 /usr/bin/python3.11 && \ + alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.11 2 && \ + alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.9 1 && \ + alternatives --set pip3 /usr/bin/pip3.11 + +COPY install_efa.sh . +RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh + +ENV LIBFABRIC_VERSION="1.20.0" +ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}" +ENV MPI_ROOT=/opt/amazon/openmpi +ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH +ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH +ENV OPAL_PREFIX=${MPI_ROOT} +ENV MPICC=${MPI_ROOT}/bin/mpicc +ENV RDMAV_FORK_SAFE=1 +ENV FI_EFA_USE_DEVICE_RDMA=1 + +RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \ + echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.4" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.4/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo + +# for Habana GPG key with SHA-1 signature +RUN update-crypto-policies --set DEFAULT:SHA1 + +RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \ + habanalabs-thunk-"$VERSION"-"$REVISION".el9 \ + habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \ + habanalabs-graph-"$VERSION"-"$REVISION".el9 && \ + rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo && rm -rf /tmp/* && \ + dnf clean all && rm -rf /var/cache/yum + +RUN rpm -V habanalabs-rdma-core && rpm -V habanalabs-thunk && rpm -V habanalabs-firmware-tools && rpm -V habanalabs-graph + +# There is no need to store pip installation files inside docker image +ENV PIP_NO_CACHE_DIR=on +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 +ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src +ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib + +RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \ + cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \ + cd /tmp/libfabric-${LIBFABRIC_VERSION} && \ + ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \ + make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION} + +RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \ + unzip /tmp/main.zip -d /tmp && \ + cd /tmp/hccl_ofi_wrapper-main && \ + make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \ + cd / && \ + rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main + +ENV APP_ROOT="/opt/app-root" + +RUN python3.11 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4 + +WORKDIR ${APP_ROOT} + +RUN python3.11 -m venv ${APP_ROOT} && \ + wget -O ${APP_ROOT}/bin/fix-permissions \ + https://raw.githubusercontent.com/sclorg/s2i-python-container/master/3.9-minimal/root/usr/bin/fix-permissions && \ + chown -R 1001:0 ${APP_ROOT} && \ + chmod +x ${APP_ROOT}/bin/fix-permissions && \ + ${APP_ROOT}/bin/fix-permissions ${APP_ROOT} -P && \ + echo "unset BASH_ENV PROMPT_COMMAND ENV" >> ${APP_ROOT}/bin/activate + +USER 1001 + +ENV BASH_ENV="${APP_ROOT}/bin/activate" +ENV ENV="${APP_ROOT}/bin/activate" +ENV PROMPT_COMMAND=". ${APP_ROOT}/bin/activate" + +SHELL ["/bin/bash", "-c"] + +RUN python -m pip install habana_media_loader=="${VERSION}"."${REVISION}" + +ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so +ENV HABANA_LOGS=/opt/app-root/log/habana_logs/ +ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw +ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins + +FROM gaudi-base AS gaudi-pytorch + +ARG PT_VERSION +ARG VERSION +ARG REVISION +ARG ARTIFACTORY_URL +ENV BASE_NAME=rhel9.4 + +LABEL name="PyTorch Installer" +LABEL summary="Habanalabs PyTorch installer layer for RHEL9.2" +LABEL description="Image with pre installed Habanalabs packages for PyTorch" + +RUN echo "/usr/lib/habanalabs" > $(python -c "import sysconfig; print(sysconfig.get_path('platlib'))")/habanalabs-graph.pt + +USER root + +RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "name=CentOS Linux 9 - CRB" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "baseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \ + echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo + +RUN dnf install --allowerasing -y \ + curl \ + cairo-devel \ + numactl-devel \ + iproute \ + which \ + zlib-devel \ + lapack-devel \ + openblas-devel \ + numactl \ + gperftools-devel && \ + dnf clean all && rm -rf /var/cache/yum + +RUN dnf config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo -y && \ + dnf install --allowerasing -y intel-mkl-64bit-2020.4-912 && \ + dnf clean all && rm -rf /var/cache/yum + +RUN rm -rf /tmp/* + +USER 1001 + +COPY --chown=1001:0 install_packages.sh . + +# Set LD_PRELOAD after all required installations to +# avoid warnings during docker creation +ENV LD_PRELOAD=/lib64/libtcmalloc.so.4 +ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768 + +RUN ./install_packages.sh && rm -f install_packages.sh + +USER root + +RUN /sbin/ldconfig && echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc && \ + chown 1001:0 ~/.bashrc + +USER 1001 + +FROM gaudi-pytorch AS gaudi-notebooks + +WORKDIR ${APP_ROOT}/src + +COPY --chown=1001:0 requirements.txt requirements.txt +COPY --chown=1001:0 start-notebook.sh /opt/app-root/bin +COPY --chown=1001:0 builder /opt/app-root/builder +COPY --chown=1001:0 utils /opt/app-root/bin/utils + +USER 1001 + +RUN python -m pip install -r requirements.txt && \ + chmod -R g+w ${APP_ROOT}/lib/python3.11/site-packages && \ + fix-permissions ${APP_ROOT} -P && \ + chmod -R g+w /opt/app-root/src && \ + sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \ + jupyter labextension disable "@jupyterlab/apputils-extension:announcements" + +RUN cd ${APP_ROOT}/ && \ + git clone https://github.com/HabanaAI/vllm-fork.git && \ + cd vllm-fork && \ + VLLM_TARGET_DEVICE=hpu pip install -e . + +WORKDIR ${APP_ROOT}/src +ENV JUPYTER_PRELOAD_REPOS="https://github.com/IntelAI/oneAPI-samples" +ENV REPO_BRANCH="main" +ENTRYPOINT ["bash", "-c", "/opt/app-root/builder/run"] diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/builder/run b/enterprise/redhat/openshift-ai/gaudi/docker/builder/run new file mode 100755 index 00000000..f91d869e --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/builder/run @@ -0,0 +1,39 @@ +#!/bin/bash + +set -eo pipefail + +set -x + +APP_ROOT=${APP_ROOT:-/opt/app-root} + +# Pre-clone repositories defined in JUPYTER_PRELOAD_REPOS +if [ -n "${JUPYTER_PRELOAD_REPOS}" ]; then + for repo in $(echo "${JUPYTER_PRELOAD_REPOS}" | tr ',' ' '); do + # Check for the presence of "@branch" in the repo string + REPO_BRANCH=$(echo "${repo}" | cut -s -d'@' -f2) + if [[ -n ${REPO_BRANCH} ]]; then + # Remove the branch from the repo string and convert REPO_BRANCH to git clone arg + repo=$(echo "${repo}" | cut -d'@' -f1) + REPO_BRANCH="-b ${REPO_BRANCH}" + fi + echo "Checking if repository $repo exists locally" + REPO_DIR=$(basename "${repo}") + if [ -d "${REPO_DIR}" ]; then + pushd "${REPO_DIR}" + # Do nothing if the repo already exists + echo "The ${repo} has already been cloned" + : + popd + else + GIT_SSL_NO_VERIFY=true git clone "${repo}" "${REPO_DIR}" "${REPO_BRANCH}" + fi + done +fi + +if [ -n "${NOTEBOOK_SAMPLES_LINK}" ]; then + for link in $(echo "${NOTEBOOK_SAMPLES_LINK}" | tr ',' ' '); do + wget "${link}" + done +fi + +"${APP_ROOT}"/bin/start-notebook.sh "$@" diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/docker-compose.yaml b/enterprise/redhat/openshift-ai/gaudi/docker/docker-compose.yaml new file mode 100644 index 00000000..d2901e32 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/docker-compose.yaml @@ -0,0 +1,64 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +services: + gaudi-base: + build: + args: + BASE_IMAGE: ${BASE_IMAGE:-registry.access.redhat.com/ubi9/ubi} + BASE_TAG: ${RHEL_OS:-9.2} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: "" + ARTIFACTORY_URL: ${ARTIFACTORY_URL:-vault.habana.ai} + VERSION: ${VERSION:-1.17.0} + REVISION: ${REVISION:-495} + context: . + target: gaudi-base + dockerfile: Dockerfile.rhel${RHEL_OS:-9.2} + image: gaudi-base:${RHEL_OS:-9.2}-${VERSION:-1.17.0}-${REVISION:-495} + gaudi-pytorch: + build: + args: + BASE_IMAGE: ${BASE_IMAGE:-registry.access.redhat.com/ubi9/ubi} + BASE_TAG: ${RHEL_OS:-9.2} + BASE_NAME: rhel${RHEL_OS:-rhel9.2} + PT_VERSION: ${PT_VERSION:-2.3.1} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: "" + ARTIFACTORY_URL: ${ARTIFACTORY_URL:-vault.habana.ai} + VERSION: ${VERSION:-1.17.0} + REVISION: ${REVISION:-495} + context: . + target: gaudi-pytorch + dockerfile: Dockerfile.rhel${RHEL_OS:-9.2} + image: gaudi-pytorch:${RHEL_OS:-9.2}-${VERSION:-1.17.0}-${REVISION:-495} + gaudi-notebooks: + build: + args: + BASE_IMAGE: ${BASE_IMAGE:-registry.access.redhat.com/ubi9/ubi} + BASE_TAG: ${RHEL_OS:-9.2} + BASE_NAME: ${BASE_NAME:-rhel9.2} + PT_VERSION: ${PT_VERSION:-2.3.1} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: "" + ARTIFACTORY_URL: ${ARTIFACTORY_URL:-vault.habana.ai} + VERSION: ${VERSION:-1.17.0} + REVISION: ${REVISION:-495} + context: . + target: gaudi-notebooks + dockerfile: Dockerfile.rhel${RHEL_OS:-9.2} + image: gaudi-notebooks:${RHEL_OS:-9.2}-${VERSION:-1.17.0}-${REVISION:-495} diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/install-python310.sh b/enterprise/redhat/openshift-ai/gaudi/docker/install-python310.sh new file mode 100755 index 00000000..a9d25005 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/install-python310.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +_BASE_NAME=${1:-"ubuntu22.04"} +_SSL_LIB="" + +# preinstall dependencies and define variables +case "${_BASE_NAME}" in +*ubuntu22.04*) + echo "Skip install Python3.10 from source on Ubuntu22.04" + exit 0 + ;; +*debian* | *ubuntu*) + apt update + apt install -y libsqlite3-dev libreadline-dev + ;; +*rhel*) + yum install -y sqlite-devel readline-devel xz-devel + ;; +*tencentos3.1*) + dnf install -y sqlite-devel readline-devel zlib-devel xz-devel bzip2-devel libffi-devel + wget -nv -O /opt/openssl-1.1.1w.tar.gz https://github.com/openssl/openssl/releases/download/OpenSSL_1_1_1w/openssl-1.1.1w.tar.gz && + cd /opt/ && + tar xzf openssl-1.1.1w.tar.gz && + rm -rf openssl-1.1.1w.tar.gz && + cd openssl-1.1.1w && + ./config --prefix=/usr/local/openssl-1.1.1w shared zlib && + make && make install + ln -s /etc/pki/tls/cert.pem /usr/local/openssl-1.1.1w/ssl/cert.pem + + PATH=$PATH:/usr/local/protoc/bin:/usr/local/openssl-1.1.1w/bin + LD_LIBRARY_PATH=/usr/local/openssl-1.1.1w/lib:$LD_LIBRARY_PATH + _SSL_LIB="--with-openssl=/usr/local/openssl-1.1.1w" + ;; +*amzn2*) + yum install -y sqlite-devel readline-devel + wget -nv -O /opt/openssl-1.1.1w.tar.gz https://github.com/openssl/openssl/releases/download/OpenSSL_1_1_1w/openssl-1.1.1w.tar.gz && + cd /opt/ && + tar xzf openssl-1.1.1w.tar.gz && + rm -rf openssl-1.1.1w.tar.gz && + cd openssl-1.1.1w && + ./config --prefix=/usr/local/openssl-1.1.1w shared zlib && + make && make install + ln -s /etc/pki/tls/cert.pem /usr/local/openssl-1.1.1w/ssl/cert.pem + + PATH=$PATH:/usr/local/protoc/bin:/usr/local/openssl-1.1.1w/bin + LD_LIBRARY_PATH=/usr/local/openssl-1.1.1w/lib:$LD_LIBRARY_PATH + _SSL_LIB="--with-openssl=/usr/local/openssl-1.1.1w" + ;; +esac + +# install Python +wget -nv -O /opt/Python-3.10.14.tgz https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz +cd /opt/ +tar xzf Python-3.10.14.tgz +rm -f Python-3.10.14.tgz +cd Python-3.10.14 +./configure --enable-optimizations --enable-loadable-sqlite-extensions --enable-shared $_SSL_LIB +make -j && make altinstall + +# post install +case "${_BASE_NAME}" in +*rhel9*) + alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.10 2 && + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && + alternatives --set python3 /usr/local/bin/python3.10 + export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + ;; +*tencentos3.1*) + alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.10 4 && + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 3 && + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 2 && + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 && + alternatives --set python3 /usr/local/bin/python3.10 + export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + ;; +*amzn2*) + update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.10 3 && + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2 && + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 + ;; +*debian*) + update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.10 3 + update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.8 2 + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 + ;; +esac + +python3 -m pip install --upgrade pip setuptools diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/install_efa.sh b/enterprise/redhat/openshift-ai/gaudi/docker/install_efa.sh new file mode 100755 index 00000000..4175e8f8 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/install_efa.sh @@ -0,0 +1,40 @@ +#!/bin/bash -ex + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DEFAULT_EFA_INSTALLER_VER=1.29.0 +efa_installer_version=${1:-$DEFAULT_EFA_INSTALLER_VER} + +tmp_dir=$(mktemp -d) +wget -nv https://efa-installer.amazonaws.com/aws-efa-installer-"$efa_installer_version".tar.gz -P "$tmp_dir" +tar -xf "$tmp_dir"/aws-efa-installer-"$efa_installer_version".tar.gz -C "$tmp_dir" +pushd "$tmp_dir"/aws-efa-installer +# shellcheck disable=SC1091 +case $( + . /etc/os-release + echo -n "$ID" +) in +rhel) + # we cannot install dkms packages on RHEL images due to OCP rules + rm -f RPMS/RHEL8/x86_64/dkms*.rpm + ;; +tencentos) + dnf install -y RPMS/ROCKYLINUX8/x86_64/rdma-core/libibverbs-46.0-1.el8.x86_64.rpm RPMS/ROCKYLINUX8/x86_64/rdma-core/libibverbs-utils-46.0-1.el8.x86_64.rpm + patch -f -p1 -i /tmp/tencentos_efa_patch.txt --reject-file=tencentos_efa_patch.rej --no-backup-if-mismatch + ;; +esac +./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify +popd +rm -rf "$tmp_dir" diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/install_packages.sh b/enterprise/redhat/openshift-ai/gaudi/docker/install_packages.sh new file mode 100755 index 00000000..d67bb4f3 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/install_packages.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +pt_package_name="pytorch_modules-v${PT_VERSION}_${VERSION}_${REVISION}.tgz" +os_string="ubuntu${OS_NUMBER}" +case "${BASE_NAME}" in +*rhel9.2*) + os_string="rhel92" + ;; +*rhel9.4*) + os_string="rhel94" + ;; +*rhel8*) + os_string="rhel86" + ;; +*amzn2*) + os_string="amzn2" + ;; +*tencentos*) + os_string="tencentos31" + ;; +esac +pt_artifact_path="https://${ARTIFACTORY_URL}/artifactory/gaudi-pt-modules/${VERSION}/${REVISION}/pytorch/${os_string}" + +tmp_path=$(mktemp --directory) +wget --no-verbose "${pt_artifact_path}/${pt_package_name}" +tar -xf "${pt_package_name}" -C "${tmp_path}"/. +pushd "${tmp_path}" +./install.sh "$VERSION" "$REVISION" +popd +# cleanup +rm -rf "${tmp_path}" "${pt_package_name}" diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/requirements.txt b/enterprise/redhat/openshift-ai/gaudi/docker/requirements.txt new file mode 100644 index 00000000..9d3a3ca7 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/requirements.txt @@ -0,0 +1,43 @@ +# LLM Packages +deepspeed @ git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + +# Datascience and useful extensions +kafka-python~=2.0.2 +matplotlib~=3.8.3 +pandas~=2.2.0 +plotly~=5.20.0 +scikit-learn +scipy~=1.12.0 +skl2onnx~=1.16.0 +codeflare-sdk~=0.18.0 + +# DB connectors +pymongo~=4.6.2 +psycopg~=3.1.18 +pyodbc~=5.1.0 +mysql-connector-python~=8.3.0 + +# JupyterLab packages +odh-elyra~=3.16.7 +jupyterlab~=3.6.7 # Wait on upgrade till plugins are ready +jupyter-bokeh~=3.0.7 # Upgrade would bring in jupyterlab 4 +jupyter-server~=2.14.1 +jupyter-server-proxy~=4.2.0 # Upgrade would bring in jupyterlab 4 +jupyter-server-terminals~=0.5.3 +jupyterlab-git~=0.44.0 +jupyterlab-lsp~=4.2.0 +jupyterlab-widgets~=3.0.10 +jupyter-resource-usage~=0.7.2 +nbdime~=3.2.1 +nbgitpuller~=1.2.0 + +# pycodestyle is dependency of below packages +# and to achieve compatible of pycodestyle with python-lsp-server[all] +# pinned the below packages +autopep8~=2.0.4 +flake8~=7.0.0 +# Base packages +wheel~=0.43.0 +setuptools>=70.0.0 +pip>=23.3 +aiohttp==3.10.2 diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/start-notebook.sh b/enterprise/redhat/openshift-ai/gaudi/docker/start-notebook.sh new file mode 100755 index 00000000..f13aa7d8 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/start-notebook.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Load bash libraries +SCRIPT_DIR=${APP_ROOT}/bin +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}"/utils/process.sh + +if [ -f "${SCRIPT_DIR}/utils/setup-elyra.sh" ]; then + # shellcheck disable=SC1091 + source "${SCRIPT_DIR}"/utils/setup-elyra.sh +fi + +# Initialize notebooks arguments variable +NOTEBOOK_PROGRAM_ARGS="" + +# Set default ServerApp.port value if NOTEBOOK_PORT variable is defined +if [ -n "${NOTEBOOK_PORT}" ]; then + NOTEBOOK_PROGRAM_ARGS+="--ServerApp.port=${NOTEBOOK_PORT} " +fi + +# Set default ServerApp.base_url value if NOTEBOOK_BASE_URL variable is defined +if [ -n "${NOTEBOOK_BASE_URL}" ]; then + NOTEBOOK_PROGRAM_ARGS+="--ServerApp.base_url=${NOTEBOOK_BASE_URL} " +fi + +# Set default ServerApp.root_dir value if NOTEBOOK_ROOT_DIR variable is defined +if [ -n "${NOTEBOOK_ROOT_DIR}" ]; then + NOTEBOOK_PROGRAM_ARGS+="--ServerApp.root_dir=${NOTEBOOK_ROOT_DIR} " +else + NOTEBOOK_PROGRAM_ARGS+="--ServerApp.root_dir=${HOME} " +fi + +# Add additional arguments if NOTEBOOK_ARGS variable is defined +if [ -n "${NOTEBOOK_ARGS}" ]; then + NOTEBOOK_PROGRAM_ARGS+=${NOTEBOOK_ARGS} +fi + +echo "${NOTEBOOK_PROGRAM_ARGS}" + +# Start the JupyterLab notebook +# shellcheck disable=SC2086 +start_process jupyter lab ${NOTEBOOK_PROGRAM_ARGS} \ + --ServerApp.ip=0.0.0.0 \ + --ServerApp.allow_origin="*" \ + --ServerApp.open_browser=False diff --git a/enterprise/redhat/openshift-ai/gaudi/docker/utils/process.sh b/enterprise/redhat/openshift-ai/gaudi/docker/utils/process.sh new file mode 100755 index 00000000..95028188 --- /dev/null +++ b/enterprise/redhat/openshift-ai/gaudi/docker/utils/process.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function start_process() { + trap stop_process TERM INT + + echo "Running command:" "$@" + echo -e "$@" + "$@" & + + PID=$! + wait $PID + trap - TERM INT + wait $PID + STATUS=$? + exit $STATUS +} + +function stop_process() { + kill -TERM "$PID" +} diff --git a/enterprise/redhat/openshift-ai/README.md b/enterprise/redhat/openshift-ai/oneapi/README.md similarity index 100% rename from enterprise/redhat/openshift-ai/README.md rename to enterprise/redhat/openshift-ai/oneapi/README.md diff --git a/enterprise/redhat/openshift-ai/assets/step-1.png b/enterprise/redhat/openshift-ai/oneapi/assets/step-1.png similarity index 100% rename from enterprise/redhat/openshift-ai/assets/step-1.png rename to enterprise/redhat/openshift-ai/oneapi/assets/step-1.png diff --git a/enterprise/redhat/openshift-ai/assets/step-2.png b/enterprise/redhat/openshift-ai/oneapi/assets/step-2.png similarity index 100% rename from enterprise/redhat/openshift-ai/assets/step-2.png rename to enterprise/redhat/openshift-ai/oneapi/assets/step-2.png diff --git a/enterprise/redhat/openshift-ai/assets/step-3.png b/enterprise/redhat/openshift-ai/oneapi/assets/step-3.png similarity index 100% rename from enterprise/redhat/openshift-ai/assets/step-3.png rename to enterprise/redhat/openshift-ai/oneapi/assets/step-3.png diff --git a/enterprise/redhat/openshift-ai/assets/step-4.png b/enterprise/redhat/openshift-ai/oneapi/assets/step-4.png similarity index 100% rename from enterprise/redhat/openshift-ai/assets/step-4.png rename to enterprise/redhat/openshift-ai/oneapi/assets/step-4.png diff --git a/enterprise/redhat/openshift-ai/manifests/intel-optimized-ml.yaml b/enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-ml.yaml similarity index 100% rename from enterprise/redhat/openshift-ai/manifests/intel-optimized-ml.yaml rename to enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-ml.yaml diff --git a/enterprise/redhat/openshift-ai/manifests/intel-optimized-pytorch.yaml b/enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-pytorch.yaml similarity index 100% rename from enterprise/redhat/openshift-ai/manifests/intel-optimized-pytorch.yaml rename to enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-pytorch.yaml diff --git a/enterprise/redhat/openshift-ai/manifests/intel-optimized-tensorflow.yaml b/enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-tensorflow.yaml similarity index 100% rename from enterprise/redhat/openshift-ai/manifests/intel-optimized-tensorflow.yaml rename to enterprise/redhat/openshift-ai/oneapi/manifests/intel-optimized-tensorflow.yaml From e4a00731ccb3a3b68df2a46150385e4f167f5e39 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Wed, 11 Sep 2024 14:42:08 -0700 Subject: [PATCH 46/50] IPEX XPU 2.3.110 support (#378) Signed-off-by: Srikanth Ramakrishna Signed-off-by: Srikanth Ramakrishna Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tyler Titsworth --- python/docker-compose.yaml | 1 + pytorch/Dockerfile | 27 ++++------ pytorch/README.md | 20 +++++-- pytorch/docker-compose.yaml | 53 ++++++++----------- pytorch/notebooks/ipex-xpu.ipynb | 8 +-- pytorch/serving/README.md | 4 +- .../serving/torchserve-xpu-requirements.txt | 9 ++-- pytorch/serving/wf-store/rest-test.sh | 4 +- pytorch/tests/tests.yaml | 8 +-- pytorch/xpu-requirements.txt | 11 ++-- 10 files changed, 71 insertions(+), 74 deletions(-) diff --git a/python/docker-compose.yaml b/python/docker-compose.yaml index a8039de4..2d674ae3 100644 --- a/python/docker-compose.yaml +++ b/python/docker-compose.yaml @@ -17,6 +17,7 @@ services: build: args: MINIFORGE_VERSION: ${MINIFORGE_VERSION:-Linux-x86_64} + no_proxy: "" context: . labels: dependency.apt.wget: true diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 2f7903d1..4015c2a5 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -167,34 +167,27 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN no_proxy=$no_proxy wget -q -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ - | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \ - | tee /etc/apt/sources.list.d/oneAPI.list +RUN rm -rf /etc/apt/sources.list.d/intel-gpu-jammy.list -ARG DPCPP_VER -ARG MKL_VER -ARG CCL_VER +ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors -RUN apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - intel-oneapi-runtime-dpcpp-cpp=${DPCPP_VER} \ - intel-oneapi-runtime-mkl=${MKL_VER} \ - intel-oneapi-runtime-ccl=${CCL_VER}; +FROM ipex-xpu-base AS ipex-xpu-base-wheels-pip -RUN rm -rf /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list +WORKDIR / +COPY xpu-requirements.txt . -ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH +RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ + rm -rf xpu-requirements.txt -FROM ipex-xpu-base AS ipex-xpu-base-wheels +FROM ipex-xpu-base AS ipex-xpu-base-wheels-idp WORKDIR / COPY xpu-requirements.txt . -RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \ +RUN conda run -n idp python -m pip install --no-cache-dir -r xpu-requirements.txt && \ rm -rf xpu-requirements.txt -FROM ipex-xpu-base AS ipex-xpu-jupyter +FROM ipex-xpu-base-wheels-${PACKAGE_OPTION} AS ipex-xpu-jupyter WORKDIR /jupyter COPY jupyter-requirements.txt . diff --git a/pytorch/README.md b/pytorch/README.md index f3fcda4e..a96170fb 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -24,7 +24,8 @@ The images below include support for both CPU and GPU optimizations: | Tag(s) | Pytorch | IPEX | Driver | Dockerfile | | ---------------------- | -------- | -------------- | ------ | --------------- | -| `2.1.40-xpu-pip-base`,`2.1.40-xpu` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | +| `2.3.110-xpu-pip-base`,`2.3.110-xpu` | [torch-2.3.1] | [v2.3.110+xpu] | [950] | [v0.4.0] | +| `2.1.40-xpu-pip-base`,`2.1.40-xpu` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | | `2.1.30-xpu` | [v2.1.0] | [v2.1.30+xpu] | [803] | [v0.4.0-Beta] | | `2.1.20-xpu` | [v2.1.0] | [v2.1.20+xpu] | [803] | [v0.3.4] | | `2.1.10-xpu` | [v2.1.0] | [v2.1.10+xpu] | [736] | [v0.2.3] | @@ -37,7 +38,7 @@ docker run -it --rm \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ --ipc=host \ - intel/intel-extension-for-pytorch:2.1.40-xpu + intel/intel-extension-for-pytorch:2.3.110-xpu ``` --- @@ -46,6 +47,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Driver | Jupyter Port | Dockerfile | | --------------------- | -------- | ------------- | ------ | ------------ | --------------- | +| `2.3.110-xpu-pip-jupyter` | [torch-2.3.1] | [v2.3.110+xpu] | [950] | `8888` | [v0.4.0-Beta] | | `2.1.40-xpu-pip-jupyter` | [v2.1.0] | [v2.1.40+xpu] | [914] | `8888` | [v0.4.0-Beta] | | `2.1.20-xpu-pip-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | | `2.1.10-xpu-pip-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | @@ -57,7 +59,7 @@ docker run -it --rm \ -p 8888:8888 \ --device /dev/dri \ -v /dev/dri/by-path:/dev/dri/by-path \ - intel/intel-extension-for-pytorch:2.1.40-xpu-pip-jupyter + intel/intel-extension-for-pytorch:2.3.110-xpu-pip-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -270,6 +272,12 @@ The images below are [TorchServe*] with CPU Optimizations: For more details, follow the procedure in the [TorchServe](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md) instructions. +The images below are [TorchServe*] with XPU Optimizations: + +| Tag(s) | Pytorch | IPEX | Dockerfile | +| ------------------- | -------- | ------------ | --------------- | +| `2.3.110-serving-xpu` | [torch-2.3.1] | [v2.3.110+xpu] | [v0.4.0-Beta] | + ## CPU only images with Intel® Distribution for Python* The images below are built only with CPU optimizations (GPU acceleration support was deliberately excluded) and include [Intel® Distribution for Python*]: @@ -308,6 +316,7 @@ The images below are built only with CPU and GPU optimizations and include [Inte | Tag(s) | Pytorch | IPEX | Driver | Dockerfile | | ---------------- | -------- | ------------ | -------- | ------ | +| `2.3.110-xpu-idp-base` | [torch-v2.3.1] | [v2.3.110+xpu] | [950] | [v0.4.0-Beta] | | `2.1.40-xpu-idp-base` | [v2.1.0] | [v2.1.40+xpu] | [914] | [v0.4.0-Beta] | | `2.1.30-xpu-idp-base` | [v2.1.0] | [v2.1.30+xpu] | [803] | [v0.4.0-Beta] | | `2.1.10-xpu-idp-base` | [v2.1.0] | [v2.1.10+xpu] | [736] | [v0.2.3] | @@ -316,6 +325,7 @@ The images below additionally include [Jupyter Notebook](https://jupyter.org/) s | Tag(s) | Pytorch | IPEX | Driver | Jupyter Port | Dockerfile | | --------------------- | -------- | ------------- | ------ | ------------ | --------------- | +| `2.3.110-xpu-idp-jupyter` | [torch-v2.3.1] | [v2.3.110+xpu] | [950] | `8888` | [v0.4.0-Beta] | | `2.1.40-xpu-idp-jupyter` | [v2.1.0] | [v2.1.40+xpu] | [914] | `8888` | [v0.4.0-Beta] | | `2.1.20-xpu-idp-jupyter` | [v2.1.0] | [v2.1.20+xpu] | [803] | `8888` | [v0.3.4] | | `2.1.10-xpu-idp-jupyter` | [v2.1.0] | [v2.1.10+xpu] | [736] | `8888` | [v0.2.3] | @@ -384,6 +394,7 @@ It is the image user's responsibility to ensure that any use of The images below [v0.2.3]: https://github.com/intel/ai-containers/blob/v0.2.3/pytorch/Dockerfile [v0.1.0]: https://github.com/intel/ai-containers/blob/v0.1.0/pytorch/Dockerfile +[v2.3.110+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.3.110%2Bxpu [v2.1.40+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.40%2Bxpu [v2.1.30+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.30%2Bxpu [v2.1.20+xpu]: https://github.com/intel/intel-extension-for-pytorch/releases/tag/v2.1.20%2Bxpu @@ -399,6 +410,8 @@ It is the image user's responsibility to ensure that any use of The images below [v2.0.1]: https://github.com/pytorch/pytorch/releases/tag/v2.0.1 [v2.0.0]: https://github.com/pytorch/pytorch/releases/tag/v2.0.0 +[torch-v2.3.1]: https://github.com/pytorch/pytorch/tree/v2.3.1 + [v3.0]: https://github.com/intel/neural-compressor/releases/tag/v3.0 [v2.6]: https://github.com/intel/neural-compressor/releases/tag/v2.6 [v2.4.1]: https://github.com/intel/neural-compressor/releases/tag/v2.4.1 @@ -422,6 +435,7 @@ It is the image user's responsibility to ensure that any use of The images below [v4.44.0]: https://github.com/huggingface/transformers/releases/tag/v4.44.0 +[950]: https://dgpu-docs.intel.com/releases/stable_950_13_20240814.html [914]: https://dgpu-docs.intel.com/releases/stable_914_33_20240730.html [803]: https://dgpu-docs.intel.com/releases/LTS_803.29_20240131.html [736]: https://dgpu-docs.intel.com/releases/stable_736_25_20231031.html diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index 6aeeefc9..efacdea1 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -94,13 +94,10 @@ services: xpu: build: args: - CCL_VER: ${CCL_VER:-2021.13.1-31} - DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} - ICD_VER: ${ICD_VER:-24.22.29735.27-914~22.04} - LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} - LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} - LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} - MKL_VER: ${MKL_VER:-2024.2.1-103} + ICD_VER: ${ICD_VER:-24.26.30049.10-950~22.04} + LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-950~22.04} + LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.30049.10-950~22.04} + LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-950~22.04} NO_PROXY: '' PACKAGE_OPTION: ${PACKAGE_OPTION:-pip} labels: @@ -109,51 +106,45 @@ services: dependency.apt.git: true dependency.apt.gnupg2: true dependency.apt.gpg-agent: true - dependency.apt.intel-level-zero-gpu: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} - dependency.apt.intel-oneapi-runtime-ccl: ${CCL_VER:-2021.13.1-31} - dependency.apt.intel-oneapi-runtime-dpcpp-cpp: ${DPCPP_VER:-2024.2.1-1079} - dependency.apt.intel-oneapi-runtime-mkl: ${MKL_VER:-2024.2.1-103} - dependency.apt.intel-opencl-icd: ${ICD_VER:-23.43.27642.40-803~22.04} - dependency.apt.level-zero: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} - dependency.apt.level-zero-dev: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} + dependency.apt.intel-level-zero-gpu: ${LEVEL_ZERO_GPU_VER:-1.3.30049.10-950~22.04} + dependency.apt.intel-opencl-icd: ${ICD_VER:-24.26.30049.10-950~22.04} + dependency.apt.level-zero: ${LEVEL_ZERO_VER:-1.17.6-950~22.04} + dependency.apt.level-zero-dev: ${LEVEL_ZERO_DEV_VER:-1.17.6-950~22.04} dependency.apt.rsync: true dependency.apt.unzip: true dependency.idp.pip: false org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Base Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base - target: ipex-xpu-base-wheels + org.opencontainers.image.version: ${IPEX_VERSION:-2.3.110}-xpu-${PACKAGE_OPTION:-pip}-base + target: ipex-xpu-base-wheels-${PACKAGE_OPTION:-pip} command: > python -c "import torch;print(torch.device('xpu'));import intel_extension_for_pytorch as - ipex;print(ipex.xpu.is_available());print(torch.__version__); + ipex;print(torch.xpu.has_xpu());print(torch.__version__); print(ipex.__version__); [print(f'[{i}]: - {ipex.xpu.get_device_properties(i)}') for i in - range(ipex.xpu.device_count())];" + {torch.xpu.get_device_properties(i)}') for i in + range(torch.xpu.device_count())];" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-base xpu-jupyter: build: args: - CCL_VER: ${CCL_VER:-2021.13.1-31} - DPCPP_VER: ${DPCPP_VER:-2024.2.1-1079} - ICD_VER: ${ICD_VER:-24.22.29735.27-914~22.04} - LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-914~22.04} - LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.29735.27-914~22.04} - LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-914~22.04} - MKL_VER: ${MKL_VER:-2024.2.1-103} + ICD_VER: ${ICD_VER:-24.26.30049.10-950~22.04} + LEVEL_ZERO_DEV_VER: ${LEVEL_ZERO_DEV_VER:-1.17.6-950~22.04} + LEVEL_ZERO_GPU_VER: ${LEVEL_ZERO_GPU_VER:-1.3.30049.10-950~22.04} + LEVEL_ZERO_VER: ${LEVEL_ZERO_VER:-1.17.6-950~22.04} NO_PROXY: '' PACKAGE_OPTION: ${PACKAGE_OPTION:-pip} labels: dependency.python.pip: jupyter-requirements.txt - org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base" + org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.3.110}-xpu-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Jupyter Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-jupyter + org.opencontainers.image.version: ${IPEX_VERSION:-2.3.110}-xpu-${PACKAGE_OPTION:-pip}-jupyter target: ipex-xpu-jupyter command: > bash -c "python -m jupyter --version" extends: ipex-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-jupyter ports: - 8888:8888 torchserve-cpu: @@ -196,7 +187,7 @@ services: docs: serving org.opencontainers.base.name: "intel/python:3.10-core" org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Serving Image" - org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-serving-xpu + org.opencontainers.image.version: ${IPEX_VERSION:-2.3.110}-serving-xpu target: torchserve-xpu command: torchserve --version entrypoint: "" diff --git a/pytorch/notebooks/ipex-xpu.ipynb b/pytorch/notebooks/ipex-xpu.ipynb index 662dd634..45df4c35 100644 --- a/pytorch/notebooks/ipex-xpu.ipynb +++ b/pytorch/notebooks/ipex-xpu.ipynb @@ -25,13 +25,13 @@ "outputs": [], "source": [ "import intel_extension_for_pytorch as ipex\n", - "print(ipex.xpu.is_available())\n", - "if (not ipex.xpu.is_available()):\n", + "print(torch.xpu.has_xpu())\n", + "if (not torch.xpu.is_available()):\n", " print('Intel GPU not detected. Please install GPU with compatible drivers')\n", " sys.exit(1)\n", - "print(ipex.xpu.has_onemkl())\n", + "print(torch.xpu.has_onemkl())\n", "print(torch.__version__); print(ipex.__version__)\n", - "[print(f'[{i}]: {ipex.xpu.get_device_properties(i)}') for i in range(ipex.xpu.device_count())]\n" + "[print(f'[{i}]: {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())]\n" ] } ], diff --git a/pytorch/serving/README.md b/pytorch/serving/README.md index 08114bba..c0a5413c 100644 --- a/pytorch/serving/README.md +++ b/pytorch/serving/README.md @@ -39,7 +39,7 @@ docker run --rm -it \ -u root \ -v $PWD:/home/model-server \ --device /dev/dri \ - intel/intel-optimized-pytorch:2.1.40-serving-xpu \ + intel/intel-optimized-pytorch:2.3.110-serving-xpu \ sh -c 'python model-archive/ipex_squeezenet.py && \ torch-model-archiver --model-name squeezenet1_1 \ --version 1.1 \ @@ -73,7 +73,7 @@ docker run -d --rm --name server \ -v $PWD/config-xpu.properties:/home/model-server/config.properties \ --net=host \ --device /dev/dri \ - intel/intel-optimized-pytorch:2.1.40-serving-xpu + intel/intel-optimized-pytorch:2.3.110-serving-xpu ``` After lauching the container, follow the steps below: diff --git a/pytorch/serving/torchserve-xpu-requirements.txt b/pytorch/serving/torchserve-xpu-requirements.txt index 534f6514..693402fe 100644 --- a/pytorch/serving/torchserve-xpu-requirements.txt +++ b/pytorch/serving/torchserve-xpu-requirements.txt @@ -1,9 +1,8 @@ -torch==2.1.0.post3+cxx11.abi -torchvision==0.16.0.post3+cxx11.abi -torchaudio==2.1.0.post3+cxx11.abi -intel_extension_for_pytorch==2.1.40+xpu +torch==2.3.1+cxx11.abi +torchvision==0.18.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +intel_extension_for_pytorch==2.3.110+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us -setuptools==69.5.1 numpy==1.26.4 captum>=0.7.0 cython>=3.0.10 diff --git a/pytorch/serving/wf-store/rest-test.sh b/pytorch/serving/wf-store/rest-test.sh index 2e5850aa..2c37871f 100644 --- a/pytorch/serving/wf-store/rest-test.sh +++ b/pytorch/serving/wf-store/rest-test.sh @@ -33,13 +33,13 @@ apt-get -y install curl curl --fail -X GET http://localhost:8080/ping -cd ../model-store || exit +cd /home/model-server/model-store || exit curl --fail -O https://torchserve.pytorch.org/mar_files/cat_dog_classification.mar curl --fail -O https://torchserve.pytorch.org/mar_files/dog_breed_classification.mar curl --fail -X POST "http://127.0.0.1:8081/models?url=cat_dog_classification.mar" curl --fail -X POST "http://127.0.0.1:8081/models?url=dog_breed_classification.mar" -cd ../wf-store || exit +cd /home/model-server/wf-store || exit curl --fail -X POST "http://127.0.0.1:8081/workflows?url=dog_breed_wf.war" curl --fail -O https://raw.githubusercontent.com/pytorch/serve/master/examples/Workflows/dog_breed_classification/model_input/Cat.jpg diff --git a/pytorch/tests/tests.yaml b/pytorch/tests/tests.yaml index 21aeeadc..ceb3df74 100644 --- a/pytorch/tests/tests.yaml +++ b/pytorch/tests/tests.yaml @@ -16,14 +16,14 @@ import-ipex-cpu-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-base cmd: python -c "import torch;import intel_extension_for_pytorch as ipex;print(f'torch {torch.__version__} ipex {ipex.__version__}')" import-ipex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-base cmd: python -c "import torch; import intel_extension_for_pytorch as ipex;[print(f'[{i}] {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())];" device: ["/dev/dri"] import-cpu-jupyter-${PACKAGE_OPTION:-pip}: img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.4.0}-jupyter cmd: python -m jupyter --version import-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-jupyter cmd: python -m jupyter --version device: ["/dev/dri"] import-cpu-oneccl-${PACKAGE_OPTION:-pip}: @@ -46,14 +46,14 @@ ipex-cpu-${PACKAGE_OPTION:-pip}: - dst: /tests src: $PWD/pytorch/tests ipex-xpu-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-base + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-base cmd: python /tests/ipex-resnet50.py --ipex --device xpu device: ["/dev/dri"] volumes: - dst: /tests src: $PWD/pytorch/tests ipex-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.110xpu}-xpu-jupyter cmd: papermill --log-output /jupyter/xpu.ipynb -k python3 device: ["/dev/dri"] notebook: True diff --git a/pytorch/xpu-requirements.txt b/pytorch/xpu-requirements.txt index e7771aa0..217ecdf7 100644 --- a/pytorch/xpu-requirements.txt +++ b/pytorch/xpu-requirements.txt @@ -1,10 +1,9 @@ -torch==2.1.0.post3+cxx11.abi -torchvision==0.16.0.post3+cxx11.abi -torchaudio==2.1.0.post3+cxx11.abi -intel_extension_for_pytorch==2.1.40+xpu -oneccl_bind_pt==2.1.400+xpu +torch==2.3.1+cxx11.abi +torchvision==0.18.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +intel_extension_for_pytorch==2.3.110+xpu +oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us -setuptools==69.5.1 numpy>=1.26.4 idna>=3.7 requests>=2.32.0 From 5ca74f7e12029ed622964d16f2d3cd5b658ce90f Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Thu, 12 Sep 2024 11:39:59 -0700 Subject: [PATCH 47/50] Update Dependabot Patterns (#377) Signed-off-by: Tyler Titsworth --- .github/dependabot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4289ee44..79f0dfb8 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -83,7 +83,7 @@ updates: groups: preset: patterns: - - "*requirements.txt" + - "*" package-ecosystem: pip schedule: interval: weekly @@ -99,7 +99,7 @@ updates: groups: gaudi-openshift: patterns: - - "requirements.txt" + - "*" package-ecosystem: pip schedule: interval: weekly From fd50d3f79c0b17c5c7edca8b8e84592f7bd4b646 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:03:53 -0700 Subject: [PATCH 48/50] Bump step-security/harden-runner from 2.9.1 to 2.10.1 (#381) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/chart-ci.yaml | 2 +- .github/workflows/container-ci.yaml | 10 +++++----- .github/workflows/dependency-review.yaml | 2 +- .github/workflows/dockerhub-description.yml | 4 ++-- .github/workflows/docs.yaml | 2 +- .github/workflows/integration-test.yaml | 4 ++-- .github/workflows/lint.yaml | 2 +- .github/workflows/scorecard.yaml | 2 +- .github/workflows/security-report.yaml | 2 +- .github/workflows/test-runner-ci.yaml | 6 +++--- .github/workflows/weekly-test.yaml | 6 +++--- 11 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/chart-ci.yaml b/.github/workflows/chart-ci.yaml index 916423b2..6f698aa6 100644 --- a/.github/workflows/chart-ci.yaml +++ b/.github/workflows/chart-ci.yaml @@ -26,7 +26,7 @@ jobs: runs-on: kubectl steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index a64e2307..6d8a22f7 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -66,7 +66,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -117,7 +117,7 @@ jobs: matrix: ${{ steps.scan-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -136,7 +136,7 @@ jobs: fail-fast: false steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -170,7 +170,7 @@ jobs: matrix: ${{ steps.test-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -187,7 +187,7 @@ jobs: experimental: [true] fail-fast: false steps: - - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + - uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml index cce8357f..635a8176 100644 --- a/.github/workflows/dependency-review.yaml +++ b/.github/workflows/dependency-review.yaml @@ -34,7 +34,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml index 201e8888..1dbd23b9 100644 --- a/.github/workflows/dockerhub-description.yml +++ b/.github/workflows/dockerhub-description.yml @@ -24,7 +24,7 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -39,7 +39,7 @@ jobs: fail-fast: false steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 997f3e6a..e51dddbb 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -32,7 +32,7 @@ jobs: pages: write steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 2a112c5b..10bc3879 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -26,7 +26,7 @@ jobs: groups: ${{ steps.group-list.outputs.FOLDERS }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -118,7 +118,7 @@ jobs: if: always() steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - run: exit 1 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 2e550689..057aab1e 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -31,7 +31,7 @@ jobs: statuses: write steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index ecdde523..45c83ef4 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -36,7 +36,7 @@ jobs: actions: read steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/security-report.yaml b/.github/workflows/security-report.yaml index 07290f08..2aaa7655 100644 --- a/.github/workflows/security-report.yaml +++ b/.github/workflows/security-report.yaml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: rsdmike/github-security-report-action@a149b24539044c92786ec39af8ba38c93496495d # v3.0.4 diff --git a/.github/workflows/test-runner-ci.yaml b/.github/workflows/test-runner-ci.yaml index 75aa8c06..e9f4bb88 100644 --- a/.github/workflows/test-runner-ci.yaml +++ b/.github/workflows/test-runner-ci.yaml @@ -33,7 +33,7 @@ jobs: fail-fast: true steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -66,7 +66,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0 @@ -76,7 +76,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/weekly-test.yaml b/.github/workflows/weekly-test.yaml index 41189eed..388c021f 100644 --- a/.github/workflows/weekly-test.yaml +++ b/.github/workflows/weekly-test.yaml @@ -25,7 +25,7 @@ jobs: groups: ${{ steps.group-list.outputs.FOLDERS }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -56,7 +56,7 @@ jobs: runs-on: kubectl steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -72,7 +72,7 @@ jobs: runs-on: ${{ github.repository_owner == 'intel' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 From 5b55cc1246643f38c5f54229ae6ad4ca045bd1e8 Mon Sep 17 00:00:00 2001 From: Srikanth Ramakrishna Date: Thu, 12 Sep 2024 15:24:00 -0700 Subject: [PATCH 49/50] update setuptools and pip to resolve trivy vulnerabilities (#380) Signed-off-by: Srikanth Ramakrishna --- pytorch/Dockerfile | 8 +++++++- pytorch/serving/torchserve-xpu-requirements.txt | 2 +- pytorch/venv-requirements.txt | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 pytorch/venv-requirements.txt diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 4015c2a5..0ad9c07f 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -229,7 +229,13 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin python3-venv && \ rm -rf /var/lib/apt/lists/* -RUN python3 -m venv /home/venv +WORKDIR / +COPY venv-requirements.txt . + +RUN python3 -m venv /home/venv && \ + /home/venv/bin/python -m pip install --no-cache-dir --upgrade pip && \ + /home/venv/bin/python -m pip install --no-cache-dir -r venv-requirements.txt && \ + rm -rf venv-requirements.txt ENV PATH="/home/venv/bin:$PATH" diff --git a/pytorch/serving/torchserve-xpu-requirements.txt b/pytorch/serving/torchserve-xpu-requirements.txt index 693402fe..6cd3ff99 100644 --- a/pytorch/serving/torchserve-xpu-requirements.txt +++ b/pytorch/serving/torchserve-xpu-requirements.txt @@ -3,7 +3,7 @@ torchvision==0.18.1+cxx11.abi torchaudio==2.3.1+cxx11.abi intel_extension_for_pytorch==2.3.110+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us -numpy==1.26.4 +numpy==2.1.1 captum>=0.7.0 cython>=3.0.10 pynvml>=11.5.0 diff --git a/pytorch/venv-requirements.txt b/pytorch/venv-requirements.txt new file mode 100644 index 00000000..4d686efe --- /dev/null +++ b/pytorch/venv-requirements.txt @@ -0,0 +1,5 @@ +setuptools>=70.0.0 +psutil==6.0.0 +mkl==2024.2.1 +mkl-include==2024.2.1 +intel-openmp==2024.2.1 From 1b1c1d4002c2ca45f970c280e0163344df44383a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 15:25:13 -0700 Subject: [PATCH 50/50] Bump the pytorch group across 1 directory with 13 updates (#379) Signed-off-by: dependabot[bot] Signed-off-by: Srikanth Ramakrishna Signed-off-by: Srikanth Ramakrishna Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Srikanth Ramakrishna Co-authored-by: Tyler Titsworth --- pytorch/hf-genai-requirements.txt | 16 ++++++++-------- pytorch/jupyter-requirements.txt | 2 +- pytorch/multinode/requirements.txt | 2 +- pytorch/serving/torchserve-requirements.txt | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pytorch/hf-genai-requirements.txt b/pytorch/hf-genai-requirements.txt index 6671cbaf..8eb7fb3a 100644 --- a/pytorch/hf-genai-requirements.txt +++ b/pytorch/hf-genai-requirements.txt @@ -1,13 +1,13 @@ -accelerate==0.33.0 -datasets==2.21.0 +accelerate==0.34.2 +datasets==3.0.0 einops==0.8.0 -evaluate==0.4.2 -onnxruntime-extensions==0.11.0 -onnxruntime==1.18.1 +evaluate==0.4.3 +onnxruntime-extensions==0.12.0 +onnxruntime==1.19.2 peft==0.12.0 -protobuf==5.27.3 +protobuf==5.28.1 py-cpuinfo==9.0.0 -scikit-learn==1.5.1 +scikit-learn==1.5.2 SentencePiece==0.2.0 tokenizers==0.19.1 -transformers==4.44.0 +transformers==4.44.2 diff --git a/pytorch/jupyter-requirements.txt b/pytorch/jupyter-requirements.txt index e95ad6e8..4313b738 100644 --- a/pytorch/jupyter-requirements.txt +++ b/pytorch/jupyter-requirements.txt @@ -1,4 +1,4 @@ -jupyterlab==4.3.0b0 +jupyterlab==4.3.0b1 jupyterhub==5.1.0 notebook==7.3.0a1 jupyter-server-proxy>=4.1.2 diff --git a/pytorch/multinode/requirements.txt b/pytorch/multinode/requirements.txt index c941708a..a303e658 100644 --- a/pytorch/multinode/requirements.txt +++ b/pytorch/multinode/requirements.txt @@ -1,4 +1,4 @@ -neural-compressor==3.0 +neural-compressor==3.0.2 oneccl_bind_pt==2.4.0+cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ oneccl-devel>=2021.13.0 # required to build deepspeed ops diff --git a/pytorch/serving/torchserve-requirements.txt b/pytorch/serving/torchserve-requirements.txt index f495a686..41d78b17 100644 --- a/pytorch/serving/torchserve-requirements.txt +++ b/pytorch/serving/torchserve-requirements.txt @@ -6,7 +6,7 @@ pyyaml>=6.0.1 torch-model-archiver==0.11.1 torch-workflow-archiver==0.2.14 torchserve==0.11.1 -torchtext==0.18.0 +torchtext==0.18.0+cpu torchvision==0.19.0 -f https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ intel_extension_for_pytorch==2.4.0+cpu