intel
diff --git a/‎.azure-pipelines/scripts/fwk_version.sh
+3-3 b/‎.azure-pipelines/scripts/fwk_version.sh
+3-3
diff --git a/‎.azure-pipelines/scripts/install_nc.sh
+10-5 b/‎.azure-pipelines/scripts/install_nc.sh
+10-5
diff --git a/‎.azure-pipelines/scripts/models/env_setup.sh
+6-2 b/‎.azure-pipelines/scripts/models/env_setup.sh
+6-2
diff --git a/‎.azure-pipelines/scripts/models/run_model_trigger_common.sh
+1-1 b/‎.azure-pipelines/scripts/models/run_model_trigger_common.sh
+1-1
diff --git a/‎.azure-pipelines/scripts/ut/3x/coverage.3x_pt
+1 b/‎.azure-pipelines/scripts/ut/3x/coverage.3x_pt
+1
diff --git a/‎.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
+1 b/‎.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
+1
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+4-1 b/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+4-1
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+3-1 b/‎.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+3-1
diff --git a/‎.azure-pipelines/scripts/ut/run_basic_pt_pruning.sh
+2-2 b/‎.azure-pipelines/scripts/ut/run_basic_pt_pruning.sh
+2-2
diff --git a/‎.azure-pipelines/scripts/ut/run_itrex.sh
-43 b/‎.azure-pipelines/scripts/ut/run_itrex.sh
-43
diff --git a/‎.azure-pipelines/template/docker-template.yml
+4-4 b/‎.azure-pipelines/template/docker-template.yml
+4-4
diff --git a/‎.azure-pipelines/ut-basic.yml
+2 b/‎.azure-pipelines/ut-basic.yml
+2
diff --git a/‎.azure-pipelines/ut-itrex.yml
-35 b/‎.azure-pipelines/ut-itrex.yml
-35
diff --git a/‎.github/checkgroup.yml
-13 b/‎.github/checkgroup.yml
-13
diff --git a/‎.pre-commit-config.yaml
+3-2 b/‎.pre-commit-config.yaml
+3-2
diff --git a/‎README.md
+16-2 b/‎README.md
+16-2
diff --git a/‎docs/build_docs/source/conf.py
+2 b/‎docs/build_docs/source/conf.py
+2
diff --git a/‎docs/build_docs/sphinx-requirements.txt
+10-6 b/‎docs/build_docs/sphinx-requirements.txt
+10-6
@@ -2,9 +2,9 @@
 
 echo "export FWs version..."
 export tensorflow_version='2.15.0-official'
-export pytorch_version='2.3.0+cpu'
-export torchvision_version='0.18.0+cpu'
-export ipex_version='2.3.0+cpu'
+export pytorch_version='2.4.0+cpu'
+export torchvision_version='0.19.0'
+export ipex_version='2.4.0+cpu'
 export onnx_version='1.16.0'
 export onnxruntime_version='1.18.0'
 export mxnet_version='1.9.1'
@@ -3,16 +3,21 @@
 echo -e "\n Install Neural Compressor ... "
 cd /neural-compressor
 if [[ $1 = *"3x_pt"* ]]; then
-    if [[ $1 != *"3x_pt_fp8"* ]]; then
+    python -m pip install --no-cache-dir -r requirements_pt.txt
+    if [[ $1 = *"3x_pt_fp8"* ]]; then
+        pip uninstall neural_compressor_3x_pt -y || true
+        python setup.py pt bdist_wheel
+    else
         echo -e "\n Install torch CPU ... "
-        pip install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu
+        pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
+        python -m pip install --no-cache-dir -r requirements.txt
+        python setup.py bdist_wheel
     fi
-    python -m pip install --no-cache-dir -r requirements_pt.txt
-    python setup.py pt bdist_wheel
     pip install --no-deps dist/neural_compressor*.whl --force-reinstall
 elif [[ $1 = *"3x_tf"* ]]; then
+    python -m pip install --no-cache-dir -r requirements.txt
     python -m pip install --no-cache-dir -r requirements_tf.txt
-    python setup.py tf bdist_wheel
+    python setup.py bdist_wheel
     pip install dist/neural_compressor*.whl --force-reinstall
 else
     python -m pip install --no-cache-dir -r requirements.txt
 
@@ -51,6 +51,10 @@ SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
 log_dir="/neural-compressor/.azure-pipelines/scripts/models"
 if [[ "${inc_new_api}" == "3x"* ]]; then
     WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
+    git clone https://github.com/intel/intel-extension-for-transformers.git /itrex
+    cd /itrex
+    pip install -r requirements.txt
+    pip install -v .
 else
     WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
 fi
@@ -95,8 +99,8 @@ if [[ "${fwk_ver}" != "latest" ]]; then
             pip install intel-tensorflow==${fwk_ver}
         fi
     elif [[ "${framework}" == "pytorch" ]]; then
-        pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
-        pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==${fwk_ver} --index-url https://download.pytorch.org/whl/cpu
+        pip install torchvision==${torch_vision_ver} --index-url https://download.pytorch.org/whl/cpu
     elif [[ "${framework}" == "onnxrt" ]]; then
         pip install onnx==1.15.0
         pip install onnxruntime==${fwk_ver}
 
@@ -88,7 +88,7 @@ elif [ "${mode}" == "tuning" ]; then
     cd ${WORK_SOURCE_DIR}/${model_src_dir}
     # for int4 models add "--accuracy" to run tuning after quantize
     if [[ "${model}" == *"int4"* ]]; then
-        sed -i "s|--quantize|--quantize --accuracy --int8|g" run_quant.sh
+        sed -i "s|--quantize|--quantize --accuracy --load|g" run_quant.sh
     fi
 
     $BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
 
@@ -7,6 +7,7 @@ include =
  */neural_compressor/torch/*
 omit =
  */neural_compressor/torch/algorithms/fp8_quant/*
+ */neural_compressor/torch/algorithms/mixed_low_precision/*
  */neural_compressor/torch/amp/*
 exclude_lines =
  pragma: no cover
 
@@ -4,6 +4,7 @@ branch = True
 [report]
 include =
  */neural_compressor/torch/algorithms/fp8_quant/*
+ */neural_compressor/torch/algorithms/mixed_low_precision/*
 exclude_lines =
  pragma: no cover
  raise NotImplementedError
 
@@ -21,7 +21,10 @@ rm -rf torch/quantization/fp8_quant
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
 ut_log_name=${LOG_DIR}/ut_3x_pt.log
-pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
+
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=\"${inc_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run.sh
+cat run.sh
+bash run.sh 2>&1 | tee ${ut_log_name}
 
 cp report.html ${LOG_DIR}/
 
 
@@ -7,8 +7,9 @@ echo "${test_case}"
 echo "set up UT env..."
 export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
 sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
+sed -i '/^auto_round/d' /neural-compressor/test/3x/torch/requirements.txt
+cat /neural-compressor/test/3x/torch/requirements.txt
 pip install -r /neural-compressor/test/3x/torch/requirements.txt
-pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0
 pip install pytest-cov
 pip install pytest-html
 pip install pytest-html-merger
@@ -25,6 +26,7 @@ pytest --cov="${inc_path}" -vs --disable-warnings --html=report_1.html --self-co
 pytest --cov="${inc_path}" -vs --disable-warnings --html=report_2.html --self-contained-html torch/quantization/weight_only/test_rtn.py 2>&1 | tee -a ${ut_log_name}
 # pytest --cov="${inc_path}" -vs --disable-warnings --html=report_3.html --self-contained-html torch/quantization/weight_only/test_autoround.py 2>&1 | tee -a ${ut_log_name}
 pytest --cov="${inc_path}" -vs --disable-warnings --html=report_4.html --self-contained-html torch/quantization/fp8_quant 2>&1 | tee -a ${ut_log_name}
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report_5.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name}
 
 mkdir -p report && mv *.html report
 pytest_html_merger -i ./report -o ./report.html
 
@@ -4,9 +4,9 @@ test_case="run basic pt pruning"
 echo "${test_case}"
 
 echo "specify fwk version..."
-export pytorch_version='2.3.0+cpu'
+export pytorch_version='2.4.0+cpu'
 export torchvision_version='0.18.0+cpu'
-export ipex_version='2.3.0+cpu'
+export ipex_version='2.4.0+cpu'
 
 echo "set up UT env..."
 bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
 
@@ -36,19 +36,18 @@ steps:
   - ${{ if eq(parameters.dockerConfigName, 'commonDockerConfig') }}:
       - script: |
           rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
-          echo y | docker image prune -a
         displayName: "Clean workspace"
 
       - checkout: self
         clean: true
         displayName: "Checkout out Repo"
+        fetchDepth: 0
 
   - ${{ if eq(parameters.dockerConfigName, 'gitCloneDockerConfig') }}:
       - script: |
           rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
           mkdir ${BUILD_SOURCESDIRECTORY}
           chmod 777 ${BUILD_SOURCESDIRECTORY}
-          echo y | docker image prune -a
         displayName: "Clean workspace"
 
       - checkout: none
@@ -62,6 +61,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'build') }}:
       - script: |
+          docker image prune -a -f
           if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
             docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
           fi
@@ -74,7 +74,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'pull') }}:
       - script: |
-            docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
+            docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
         displayName: "Pull habana docker image"
 
   - script: |
@@ -95,7 +95,7 @@ steps:
             else
                 docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
                 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
-                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
             fi
             echo "Show the container list after docker run ... "
             docker ps -a
 
@@ -19,6 +19,8 @@ pr:
       - neural_compressor/torch
       - neural_compressor/tensorflow
       - neural_compressor/onnxrt
+      - neural_compressor/transformers
+      - neural_compressor/evaluation
       - .azure-pipelines/scripts/ut/3x
 
 pool: ICX-16C
 
@@ -78,19 +78,6 @@ subprojects:
       - "UT-Basic (Unit Test other basic case Test other basic case)"
       - "UT-Basic (Unit Test other cases baseline Test other cases baseline)"
 
-  - id: "Unit Tests ITREX workflow"
-    paths:
-      - "neural_compressor/**"
-      - "setup.py"
-      - "requirements.txt"
-      - ".azure-pipelines/scripts/ut/run_itrex.sh"
-      - ".azure-pipelines/ut-itrex.yml"
-      - "!neural_compressor/common/**"
-      - "!neural_compressor/torch/**"
-      - "!neural_compressor/tensorflow/**"
-    checks:
-      - "UT-ITREX"
-
   - id: "Unit Tests 3x-TensorFlow workflow"
     paths:
       - "neural_compressor/common/**"
 
@@ -76,7 +76,7 @@ repos:
           )$
 
   - repo: https://github.com/PyCQA/docformatter
-    rev: v1.7.5
+    rev: 06907d0
     hooks:
       - id: docformatter
         args: [
@@ -129,7 +129,8 @@ repos:
               examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
               examples/notebook/dynas/ResNet50_Quantiation_Search_Supernet_NAS.ipynb|
               examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb|
-              neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt
+              neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt|
+              neural_compressor/evaluation/hf_eval/datasets/cnn_validation.json
           )$
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
 
@@ -27,6 +27,7 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
 * Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
 
 ## What's New
+* [2024/10] [Transformers-like API](./docs/source/3x/transformers_like_api.md) for INT4 inference on Intel CPU and GPU.
 * [2024/07] From 3.0 release, framework extension API is recommended to be used for quantization.
 * [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
 
@@ -71,7 +72,7 @@ pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
 ```
 After successfully installing these packages, try your first quantization program.
 
-### [FP8 Quantization](./examples/3.x_api/pytorch/cv/fp8_quant/)
+### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
 Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator. 
 
 To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
@@ -147,7 +148,7 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
     </tr>
     <tr>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_WeightOnlyQuant.md">Weight-Only Quantization</a></td>
-        <td colspan="2" align="center"><a href="./docs/3x/PT_FP8Quant.md">FP8 Quantization</a></td>
+        <td colspan="2" align="center"><a href="./docs/source/3x/PT_FP8Quant.md">FP8 Quantization</a></td>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_MXQuant.md">MX Quantization</a></td>
         <td colspan="2" align="center"><a href="./docs/source/3x/PT_MixedPrecision.md">Mixed Precision</a></td>
     </tr>
@@ -164,6 +165,16 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
           <td colspan="2" align="center"><a href="./docs/source/3x/TF_SQ.md">Smooth Quantization</a></td>
       </tr>
   </tbody>
+  <thead>
+      <tr>
+        <th colspan="8">Transformers-like APIs</th>
+      </tr>
+  </thead>
+  <tbody>
+      <tr>
+          <td colspan="8" align="center"><a href="./docs/source/3x/transformers_like_api.md">Overview</a></td>
+      </tr>
+  </tbody>
   <thead>
       <tr>
         <th colspan="8">Other Modules</th>
@@ -181,6 +192,9 @@ Intel Neural Compressor will convert the model format from auto-gptq to hpu form
 > From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently.
 
 ## Selected Publications/Events
+
+* EMNLP'2024: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2024)
+* Blog on Medium: [Quantization on Intel Gaudi Series AI Accelerators](https://medium.com/intel-analytics-software/intel-neural-compressor-v3-0-a-quantization-tool-across-intel-hardware-9856adee6f11) (Aug 2024)
 * Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
 * Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
 * Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
 
@@ -34,10 +34,12 @@
     "sphinx.ext.coverage",
     "sphinx.ext.autosummary",
     "sphinx_md",
+    "sphinx_rtd_theme",
     "autoapi.extension",
     "sphinx.ext.napoleon",
     "sphinx.ext.githubpages",
     "sphinx.ext.linkcode",
+    "sphinxcontrib.jquery",
 ]
 
 autoapi_dirs = ["../../neural_compressor"]
 
@@ -1,6 +1,10 @@
-recommonmark
-sphinx==6.1.1
-sphinx-autoapi
-sphinx-markdown-tables
-sphinx-md
-sphinx_rtd_theme
+recommonmark==0.7.1
+setuptools_scm[toml]==8.1.0
+sphinx==7.3.7
+sphinx-autoapi==3.1.0
+sphinx-autobuild==2024.4.16
+sphinx-markdown-tables==0.0.17
+sphinx-md==0.0.4
+sphinx_rtd_theme==2.0.0
+sphinxcontrib-jquery==4.1
+sphinxemoji==0.3.1