intel
diff --git a/‎.azure-pipelines/scripts/fwk_version.sh
+3-3 b/‎.azure-pipelines/scripts/fwk_version.sh
+3-3
diff --git a/‎.azure-pipelines/scripts/install_nc.sh
+10-5 b/‎.azure-pipelines/scripts/install_nc.sh
+10-5
diff --git a/‎.azure-pipelines/scripts/models/env_setup.sh
+6-2 b/‎.azure-pipelines/scripts/models/env_setup.sh
+6-2
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+4-1 b/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+4-1
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+2 b/‎.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+2
diff --git a/‎.azure-pipelines/scripts/ut/env_setup.sh
+1-1 b/‎.azure-pipelines/scripts/ut/env_setup.sh
+1-1
diff --git a/‎.azure-pipelines/scripts/ut/run_basic_pt_pruning.sh
+2-2 b/‎.azure-pipelines/scripts/ut/run_basic_pt_pruning.sh
+2-2
diff --git a/‎.azure-pipelines/scripts/ut/run_itrex.sh
+2-1 b/‎.azure-pipelines/scripts/ut/run_itrex.sh
+2-1
diff --git a/‎.azure-pipelines/template/docker-template.yml
+2-2 b/‎.azure-pipelines/template/docker-template.yml
+2-2
diff --git a/‎.azure-pipelines/ut-basic.yml
+2 b/‎.azure-pipelines/ut-basic.yml
+2
diff --git a/‎.pre-commit-config.yaml
+2-1 b/‎.pre-commit-config.yaml
+2-1
diff --git a/‎README.md
+16-2 b/‎README.md
+16-2
diff --git a/‎docs/build_docs/source/conf.py
+2 b/‎docs/build_docs/source/conf.py
+2
diff --git a/‎docs/build_docs/sphinx-requirements.txt
+10-6 b/‎docs/build_docs/sphinx-requirements.txt
+10-6
diff --git a/‎docs/build_docs/update_html.py
+23 b/‎docs/build_docs/update_html.py
+23
diff --git a/‎docs/3x/PT_FP8Quant.md ‎docs/source/3x/PT_FP8Quant.md
+1-1 b/‎docs/3x/PT_FP8Quant.md ‎docs/source/3x/PT_FP8Quant.md
+1-1
diff --git a/‎docs/source/3x/PT_WeightOnlyQuant.md
+6-2 b/‎docs/source/3x/PT_WeightOnlyQuant.md
+6-2
diff --git a/‎docs/source/3x/PyTorch.md
+22-5 b/‎docs/source/3x/PyTorch.md
+22-5
@@ -2,9 +2,9 @@
 
 echo "export FWs version..."
 export tensorflow_version='2.15.0-official'
-export pytorch_version='2.3.0+cpu'
-export torchvision_version='0.18.0+cpu'
-export ipex_version='2.3.0+cpu'
+export pytorch_version='2.4.0+cpu'
+export torchvision_version='0.19.0'
+export ipex_version='2.4.0+cpu'
 export onnx_version='1.16.0'
 export onnxruntime_version='1.18.0'
 export mxnet_version='1.9.1'
@@ -3,16 +3,21 @@
 echo -e "\n Install Neural Compressor ... "
 cd /neural-compressor
 if [[ $1 = *"3x_pt"* ]]; then
-    if [[ $1 != *"3x_pt_fp8"* ]]; then
+    python -m pip install --no-cache-dir -r requirements_pt.txt
+    if [[ $1 = *"3x_pt_fp8"* ]]; then
+        pip uninstall neural_compressor_3x_pt -y || true
+        python setup.py pt bdist_wheel
+    else
         echo -e "\n Install torch CPU ... "
-        pip install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu
+        pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
+        python -m pip install --no-cache-dir -r requirements.txt
+        python setup.py bdist_wheel
     fi
-    python -m pip install --no-cache-dir -r requirements_pt.txt
-    python setup.py pt bdist_wheel
     pip install --no-deps dist/neural_compressor*.whl --force-reinstall
 elif [[ $1 = *"3x_tf"* ]]; then
+    python -m pip install --no-cache-dir -r requirements.txt
     python -m pip install --no-cache-dir -r requirements_tf.txt
-    python setup.py tf bdist_wheel
+    python setup.py bdist_wheel
     pip install dist/neural_compressor*.whl --force-reinstall
 else
     python -m pip install --no-cache-dir -r requirements.txt
 
@@ -51,6 +51,10 @@ SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
 log_dir="/neural-compressor/.azure-pipelines/scripts/models"
 if [[ "${inc_new_api}" == "3x"* ]]; then
     WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
+    git clone https://github.com/intel/intel-extension-for-transformers.git /itrex
+    cd /itrex
+    pip install -r requirements.txt
+    pip install -v .
 else
     WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
 fi
@@ -95,8 +99,8 @@ if [[ "${fwk_ver}" != "latest" ]]; then
             pip install intel-tensorflow==${fwk_ver}
         fi
     elif [[ "${framework}" == "pytorch" ]]; then
-        pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
-        pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==${fwk_ver} --index-url https://download.pytorch.org/whl/cpu
+        pip install torchvision==${torch_vision_ver} --index-url https://download.pytorch.org/whl/cpu
     elif [[ "${framework}" == "onnxrt" ]]; then
         pip install onnx==1.15.0
         pip install onnxruntime==${fwk_ver}
 
@@ -21,7 +21,10 @@ rm -rf torch/quantization/fp8_quant
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
 ut_log_name=${LOG_DIR}/ut_3x_pt.log
-pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
+
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=\"${inc_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run.sh
+cat run.sh
+bash run.sh 2>&1 | tee ${ut_log_name}
 
 cp report.html ${LOG_DIR}/
 
 
@@ -7,6 +7,8 @@ echo "${test_case}"
 echo "set up UT env..."
 export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
 sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
+sed -i '/^auto_round/d' /neural-compressor/test/3x/torch/requirements.txt
+cat /neural-compressor/test/3x/torch/requirements.txt
 pip install -r /neural-compressor/test/3x/torch/requirements.txt
 pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0
 pip install pytest-cov
 
@@ -92,7 +92,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
 fi
 
 if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
-    pip install git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
+    pip install git+https://github.com/intel/auto-round.git@5dd16fc34a974a8c2f5a4288ce72e61ec3b1410f
 fi
 
 # test deps
 
@@ -4,9 +4,9 @@ test_case="run basic pt pruning"
 echo "${test_case}"
 
 echo "specify fwk version..."
-export pytorch_version='2.3.0+cpu'
+export pytorch_version='2.4.0+cpu'
 export torchvision_version='0.18.0+cpu'
-export ipex_version='2.3.0+cpu'
+export ipex_version='2.4.0+cpu'
 
 echo "set up UT env..."
 bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
 
@@ -18,7 +18,8 @@ bash /intel-extension-for-transformers/.github/workflows/script/install_binary.s
 sed -i '/neural-compressor.git/d' /intel-extension-for-transformers/tests/requirements.txt
 pip install -r /intel-extension-for-transformers/tests/requirements.txt
 # workaround
-pip install onnx==1.15.0
+pip install onnx==1.16.0
+pip install onnxruntime==1.18.0
 echo "pip list itrex ut deps..."
 pip list
 LOG_DIR=/neural-compressor/log_dir
 
@@ -36,19 +36,18 @@ steps:
   - ${{ if eq(parameters.dockerConfigName, 'commonDockerConfig') }}:
       - script: |
           rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
-          echo y | docker image prune -a
         displayName: "Clean workspace"
 
       - checkout: self
         clean: true
         displayName: "Checkout out Repo"
+        fetchDepth: 0
 
   - ${{ if eq(parameters.dockerConfigName, 'gitCloneDockerConfig') }}:
       - script: |
           rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
           mkdir ${BUILD_SOURCESDIRECTORY}
           chmod 777 ${BUILD_SOURCESDIRECTORY}
-          echo y | docker image prune -a
         displayName: "Clean workspace"
 
       - checkout: none
@@ -62,6 +61,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'build') }}:
       - script: |
+          docker image prune -a -f
           if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
             docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
           fi
 
@@ -19,6 +19,8 @@ pr:
       - neural_compressor/torch
       - neural_compressor/tensorflow
       - neural_compressor/onnxrt
+      - neural_compressor/transformers
+      - neural_compressor/evaluation
       - .azure-pipelines/scripts/ut/3x
 
 pool: ICX-16C
 
@@ -129,7 +129,8 @@ repos:
               examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
               examples/notebook/dynas/ResNet50_Quantiation_Search_Supernet_NAS.ipynb|
               examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb|
-              neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt
+              neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt|
+              neural_compressor/evaluation/hf_eval/datasets/cnn_validation.json
           )$
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
 
@@ -27,6 +27,7 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
 * Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
 
 ## What's New
+* [2024/10] [Transformers-like API](./docs/source/3x/transformers_like_api.md) for INT4 inference on Intel CPU and GPU.
 * [2024/07] From 3.0 release, framework extension API is recommended to be used for quantization.
 * [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
 
@@ -71,7 +72,7 @@ pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
 ```
 After successfully installing these packages, try your first quantization program.
 
-### [FP8 Quantization](./examples/3.x_api/pytorch/cv/fp8_quant/)
+### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
 Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator. 
 
 To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
@@ -147,7 +148,7 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
     </tr>
     <tr>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_WeightOnlyQuant.md">Weight-Only Quantization</a></td>
-        <td colspan="2" align="center"><a href="./docs/3x/PT_FP8Quant.md">FP8 Quantization</a></td>
+        <td colspan="2" align="center"><a href="./docs/source/3x/PT_FP8Quant.md">FP8 Quantization</a></td>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_MXQuant.md">MX Quantization</a></td>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_MixedPrecision.md">Mixed Precision</a></td>
     </tr>
@@ -164,6 +165,16 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
           <td colspan="2" align="center"><a href="./docs/source/3x/TF_SQ.md">Smooth Quantization</a></td>
       </tr>
   </tbody>
+  <thead>
+      <tr>
+        <th colspan="8">Transformers-like APIs</th>
+      </tr>
+  </thead>
+  <tbody>
+      <tr>
+          <td colspan="8" align="center"><a href="./docs/source/3x/transformers_like_api.md">Overview</a></td>
+      </tr>
+  </tbody>
   <thead>
       <tr>
         <th colspan="8">Other Modules</th>
@@ -181,6 +192,9 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
 > From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently.
 
 ## Selected Publications/Events
+
+* EMNLP'2024: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2024)
+* Blog on Medium: [Quantization on Intel Gaudi Series AI Accelerators](https://medium.com/intel-analytics-software/intel-neural-compressor-v3-0-a-quantization-tool-across-intel-hardware-9856adee6f11) (Aug 2024)
 * Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
 * Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
 * Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
 
@@ -34,10 +34,12 @@
     "sphinx.ext.coverage",
     "sphinx.ext.autosummary",
     "sphinx_md",
+    "sphinx_rtd_theme",
     "autoapi.extension",
     "sphinx.ext.napoleon",
     "sphinx.ext.githubpages",
     "sphinx.ext.linkcode",
+    "sphinxcontrib.jquery",
 ]
 
 autoapi_dirs = ["../../neural_compressor"]
 
@@ -1,6 +1,10 @@
-recommonmark
-sphinx==6.1.1
-sphinx-autoapi
-sphinx-markdown-tables
-sphinx-md
-sphinx_rtd_theme
+recommonmark==0.7.1
+setuptools_scm[toml]==8.1.0
+sphinx==7.3.7
+sphinx-autoapi==3.1.0
+sphinx-autobuild==2024.4.16
+sphinx-markdown-tables==0.0.17
+sphinx-md==0.0.4
+sphinx_rtd_theme==2.0.0
+sphinxcontrib-jquery==4.1
+sphinxemoji==0.3.1
@@ -56,11 +56,34 @@ def update_source_url(version, folder_name, index_file):
         f.write(index_buf)
 
 
+def update_search(folder):
+    search_file_name = "{}/search.html".format(folder)
+
+    with open(search_file_name, "r") as f:
+        index_buf = f.read()
+        key_str = '<script src="_static/searchtools.js"></script>'
+        version_list = """<!--[if lt IE 9]>
+    <script src="_static/js/html5shiv.min.js"></script>
+    <![endif]-->
+        <script src="_static/jquery.js?v=5d32c60e"></script>
+        <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="_static/documentation_options.js?v=fc837d61"></script>
+        <script src="_static/doctools.js?v=9a2dae69"></script>
+        <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="_static/js/theme.js"></script>
+    <script src="_static/searchtools.js"></script>"""
+        index_buf = index_buf.replace(key_str, version_list)
+
+    with open(search_file_name, "w") as f:
+        f.write(index_buf)
+
+
 def main(folder, version):
     folder_name = os.path.basename(folder)
     for index_file in glob.glob("{}/**/*.html".format(folder), recursive=True):
         update_version_link(version, folder_name, index_file)
         update_source_url(version, folder_name, index_file)
+    update_search(folder)
 
 
 def help(me):
 
@@ -108,6 +108,6 @@ model = convert(model)
 | Task                 | Example |
 |----------------------|---------|
 | Computer Vision (CV)      |    [Link](../../examples/3.x_api/pytorch/cv/fp8_quant/)     |
-| Large Language Model (LLM) |    [Link](https://github.com/HabanaAI/optimum-habana-fork/tree/habana-main/examples/text-generation#running-with-fp8)     |
+| Large Language Model (LLM) |    [Link](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation#running-with-fp8)     |
 
 > Note: For LLM, Optimum-habana provides higher performance based on modified modeling files, so here the Link of LLM goes to Optimum-habana, which utilize Intel Neural Compressor for FP8 quantization internally.
@@ -1,6 +1,7 @@
 
 PyTorch Weight Only Quantization
 ===============
+
 - [Introduction](#introduction)
 - [Supported Matrix](#supported-matrix)
 - [Usage](#usage)
@@ -14,6 +15,8 @@ PyTorch Weight Only Quantization
     - [HQQ](#hqq)
   - [Specify Quantization Rules](#specify-quantization-rules)
   - [Saving and Loading](#saving-and-loading)
+- [Layer Wise Quantization](#layer-wise-quantization)
+- [Efficient Usage on Client-Side](#efficient-usage-on-client-side)
 - [Examples](#examples)
 
 ## Introduction
@@ -108,9 +111,10 @@ model = convert(model)
 |               model_path (str)        |  Model path that is used to load   state_dict per layer                                                                                    |                    |
 |               use_double_quant (bool) |  Enables double quantization                                                                                                               |  False  |
 |               act_order (bool)        |  Whether to sort Hessian's diagonal   values to rearrange channel-wise quantization order                                                  |  False  |
-|               percdamp (float)        |  Percentage of Hessian's diagonal   values' average, which will be added to Hessian's diagonal to increase   numerical stability           |  0.01.  |
+|               percdamp (float)        |  Percentage of Hessian's diagonal   values' average, which will be added to Hessian's diagonal to increase   numerical stability           |  0.01  |
 |               block_size (int)        |  Execute GPTQ quantization per   block, block shape = [C_out, block_size]                                                                  |  128     |
-|               static_groups (bool)    |  Whether to calculate group wise   quantization parameters in advance. This option mitigate actorder's extra   computational requirements. |  False.  |
+|               static_groups (bool)    |  Whether to calculate group wise   quantization parameters in advance. This option mitigate actorder's extra   computational requirements. |  False  |
+|               true_sequential (bool)    |  Whether to quantize layers within a transformer block in their original order. This can lead to higher accuracy but slower overall quantization process. |  False  |
 > **Note:** `model_path` is only used when use_layer_wise=True. `layer-wise` is stay-tuned.
 
 ``` python
 
@@ -176,16 +176,21 @@ def load(output_dir="./saved_results", model=None):
     <td class="tg-9wq8"><a href="PT_SmoothQuant.md">link</a></td>
   </tr>
   <tr>
-    <td class="tg-9wq8" rowspan="2">Static Quantization</td>
-    <td class="tg-9wq8" rowspan="2"><a href=https://pytorch.org/docs/master/quantization.html#post-training-static-quantization>Post-traning Static Quantization</a></td>
-    <td class="tg-9wq8">intel-extension-for-pytorch</td>
+    <td class="tg-9wq8" rowspan="3">Static Quantization</td>
+    <td class="tg-9wq8" rowspan="3"><a href=https://pytorch.org/docs/master/quantization.html#post-training-static-quantization>Post-traning Static Quantization</a></td>
+    <td class="tg-9wq8">intel-extension-for-pytorch (INT8)</td>
     <td class="tg-9wq8">&#10004</td>
     <td class="tg-9wq8"><a href="PT_StaticQuant.md">link</a></td>
   </tr>
   <tr>
-    <td class="tg-9wq8"><a href=https://pytorch.org/docs/stable/torch.compiler_deepdive.html>TorchDynamo</a></td>
+    <td class="tg-9wq8"><a href=https://pytorch.org/docs/stable/torch.compiler_deepdive.html>TorchDynamo (INT8)</a></td>
     <td class="tg-9wq8">&#10004</td>
     <td class="tg-9wq8"><a href="PT_StaticQuant.md">link</a></td>
+  <tr>
+    <td class="tg-9wq8"><a href=https://docs.habana.ai/en/latest/index.html>Intel Gaudi AI accelerator (FP8)</a></td>
+    <td class="tg-9wq8">&#10004</td>
+    <td class="tg-9wq8"><a href="PT_FP8Quant.md">link</a></td>
+  </tr>
   </tr>
   <tr>
     <td class="tg-9wq8">Dynamic Quantization</td>
@@ -240,7 +245,7 @@ Deep Learning</a></td>
     </table>
 
 2. How to set different configuration for specific op_name or op_type?
-    > INC extends a `set_local` method based on the global configuration object to set custom configuration.
+    > Neural Compressor extends a `set_local` method based on the global configuration object to set custom configuration.
 
     ```python
     def set_local(self, operator_name_or_list: Union[List, str, Callable], config: BaseConfig) -> BaseConfig:
@@ -259,3 +264,15 @@ Deep Learning</a></td>
     quant_config.set_local(".*mlp.*", RTNConfig(bits=8))  # For layers with "mlp" in their names, set bits=8
     quant_config.set_local("Conv1d", RTNConfig(dtype="fp32"))  # For Conv1d layers, do not quantize them.
     ```
+
+3. How to specify an accelerator?
+
+    > Neural Compressor provides automatic accelerator detection, including HPU, XPU, CUDA, and CPU.
+
+    > The automatically detected accelerator may not be suitable for some special cases, such as poor performance, memory limitations. In such situations, users can override the detected accelerator by setting the environment variable `INC_TARGET_DEVICE`.
+
+    > Usage:
+
+    ```bash
+    export INC_TARGET_DEVICE=cpu
+    ```