Add IPEX inference_mode contextmanager to enable optimization on Intel platform. (#125)

mfuntowicz · web-flow · commit d16ca550c050 · 2022-12-05T14:42:24.000+01:00
* Add IPEX dependency

* Initial support for IPEX inference mode.

* Added docker image

* Force float32 for now with kernel selection.

* Implement default fallback in case of Exception for optimized model.

* Move IPEx to optional dependency

* Simplify the usage of inference_mode by forcing usage of oneDNN

* Enable the use of AMP for bfloat16

* Added documentation.

* Style.

* Making sure we are not importing ipex if not available.
diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel
@@ -0,0 +1,70 @@
+# syntax = docker/dockerfile:1
+# based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile
+#
+# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1
+#
+#       If you do not use buildkit you are not going to have a good time
+#
+#       For reference:
+#           https://docs.docker.com/develop/develop-images/build_enhancements/
+
+ARG BASE_IMAGE=ubuntu:22.04
+FROM ${BASE_IMAGE} AS dev-base
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    ca-certificates \
+    git \
+    curl \
+    vim \
+    build-essential \
+    ccache \
+    libgoogle-perftools-dev \
+    numactl \
+    cmake \
+    libjpeg-dev \
+    pybind11-dev \
+    libpng-dev \
+    pybind11-dev \
+    && rm -rf /var/lib/apt/lists/*
+RUN /usr/sbin/update-ccache-symlinks
+RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
+ENV PATH /opt/conda/bin:$PATH
+
+FROM dev-base as conda
+ARG PYTHON_VERSION=3.10
+RUN curl -fsSL -v -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
+    chmod +x ~/miniconda.sh && \
+    ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh && \
+    /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda-build pyyaml numpy ipython mkl mkl-include ninja cython typing pybind11 Pillow && \
+    /opt/conda/bin/conda clean -ya
+
+FROM dev-base AS build
+ARG IPEX_VERSION=v1.13.0
+ARG PYTORCH_VERSION=v1.13.0
+ARG TORCHVISION_VERSION=0.13.0+cpu
+ARG TORCHAUDIO_VERSION=0.13.0+cpu
+COPY --from=conda /opt/conda /opt/conda
+RUN --mount=type=cache,target=/opt/ccache \
+    python -m pip install --no-cache-dir torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \
+    git clone https://github.com/intel/intel-extension-for-pytorch && \
+    cd intel-extension-for-pytorch && \
+    git checkout ${IPEX_VERSION} && \
+    git submodule sync && \
+    git submodule update --init --recursive && \
+    python -m pip install --no-cache-dir -r requirements.txt && \
+    python setup.py bdist_wheel && \
+    python -m pip install --no-cache-dir dist/*.whl && \
+    cd .. && rm -rf intel-extension-for-pytorch
+
+FROM dev-base as dev
+COPY --from=build /opt/conda /opt/conda
+ARG OMP_NUM_THREADS=1
+ENV OMP_NUM_THREADS ${OMP_NUM_THREADS}
+ARG KMP_BLOCKTIME=1
+ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
+ARG KMP_HW_SUBSET=1T
+ENV KMP_HW_SUBSET ${KMP_HW_SUBSET}
+ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"
+ENV LD_LIBRARY_PATH "/opt/conda/lib/python3.8/site-packages/lib/"
diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
@@ -12,4 +12,5 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+from .ipex import inference_mode
 from .version import __version__
diff --git a/optimum/intel/ipex/__init__.py b/optimum/intel/ipex/__init__.py
@@ -0,0 +1 @@
+from .inference import inference_mode
diff --git a/optimum/intel/ipex/inference.py b/optimum/intel/ipex/inference.py
@@ -0,0 +1,107 @@
+from typing import Union
+
+import torch
+from torch import nn
+from transformers import add_start_docstrings
+from transformers.pipelines import Pipeline
+from transformers.utils import is_ipex_available
+
+
+IPEX_NOT_AVAILABLE_ERROR_MSG = (
+    "Intel PyTorch Extensions was not found."
+    "please make sure you've installed the package or run "
+    "pip install intel_extension_for_pytorch"
+)
+
+if is_ipex_available():
+    import intel_extension_for_pytorch as ipex
+
+
+class _ModelFallbackWrapper:
+
+    __slots__ = ("_optimized", "_default")
+
+    def __init__(self, optimized, default):
+        self._optimized = optimized
+        self._default = default
+
+    def __call__(self, *args, **kwargs):
+        try:
+            return self._optimized(*args, **kwargs)
+        except Exception:
+            return self._default(*args, **kwargs)
+
+    def __getattr__(self, item):
+        if not item.startswith("__"):
+            return getattr(self._default, item)
+        else:
+            return self.item
+
+
+@add_start_docstrings(
+    """
+    inference_mode is an Intel specific context-manager analogous to PyTorch's inference_mode to use for inference
+    workload on Intel CPUs, especially Intel Xeon Scalable CPUs.
+    """,
+)
+class inference_mode:
+    __slots__ = ("_model", "_dtype", "_graph_mode", "_verbose", "_original")
+
+    def __init__(self, model: Union[nn.Module, Pipeline], dtype: torch.dtype = torch.float32, verbose: bool = False):
+        """
+        Args:
+            model (`torch.nn.Module` or `transformers.Pipeline`):
+                The model or pipeline instance to optimize.
+            dtype (`torch.dtype = torch.float32`), *optional*):
+                The data type used to do the computation.
+                Acceptable type are `torch.float32` (default) and `torch.bfloat16`.
+                Please note `torch.bfloat16` requires `avx512_bf16` instructions set as present on
+                4th Generation of Intel Xeon Scalable CPUs (Sapphire Rapids).
+            verbose (`boolean = False`, *optional*):
+                Enable IPEx verbose output to see the kernels and optimizations applied.
+        """
+        if not is_ipex_available():
+            raise ImportError(IPEX_NOT_AVAILABLE_ERROR_MSG)
+
+        self._model = model
+        self._verbose = ipex.utils.verbose.VERBOSE_ON if verbose else ipex.utils.verbose.VERBOSE_OFF
+        self._dtype = dtype
+        self._graph_mode = False  # Let's keep for future use when it doesn't hang anymore
+        self._original = None
+
+    def __enter__(self):
+        with torch.inference_mode():
+            with ipex.verbose(self._verbose):
+                ipex.enable_onednn_fusion(True)
+                if isinstance(self._model, Pipeline):
+                    self._original = self._model.model
+
+                    model = ipex.optimize(
+                        self._model.model,
+                        dtype=self._dtype,
+                        graph_mode=self._graph_mode,
+                        level="O1",
+                        auto_kernel_selection=True,
+                    )
+
+                    # Enable automatic mixed precision (AMP) if we are going to target `bfloat16`
+                    with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)):
+                        # Patching model with the new one
+                        self._model.model = _ModelFallbackWrapper(model, self._original)
+                        return self._model
+                else:
+                    self._original = self._model
+                    model = ipex.optimize(
+                        self._model,
+                        dtype=self._dtype,
+                        graph_mode=self._graph_mode,
+                        level="O1",
+                        auto_kernel_selection=True,
+                    )
+
+                    # Enable automatic mixed precision (AMP) if we are going to target `bfloat16`
+                    with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)):
+                        return model
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._model = self._original
diff --git a/setup.py b/setup.py
@@ -32,6 +32,7 @@
     "neural-compressor": "neural-compressor>=1.13.0",
     "openvino": ["openvino>=2022.2.0", "transformers>=4.20.0,<4.24.1"],
     "nncf": ["nncf"],
+    "ipex": ["intel_extension_for_pytorch"],
     "quality": QUALITY_REQUIRES,
     "tests": TESTS_REQUIRE,
 }

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@`
`32`	`32`	`"neural-compressor": "neural-compressor>=1.13.0",`
`33`	`33`	`"openvino": ["openvino>=2022.2.0", "transformers>=4.20.0,<4.24.1"],`
`34`	`34`	`"nncf": ["nncf"],`
	`35`	`+ "ipex": ["intel_extension_for_pytorch"],`
`35`	`36`	`"quality": QUALITY_REQUIRES,`
`36`	`37`	`"tests": TESTS_REQUIRE,`
`37`	`38`	`}`