intel
diff --git a/‎.azure-pipelines/scripts/install_nc.sh
+1-1 b/‎.azure-pipelines/scripts/install_nc.sh
+1-1
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
+4-1 b/‎.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
+4-1
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
+46 b/‎.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
+46
diff --git a/‎.azure-pipelines/ut-3x-tf.yml
+13 b/‎.azure-pipelines/ut-3x-tf.yml
+13
diff --git a/‎neural_compressor/tensorflow/algorithms/__init__.py
+1-1 b/‎neural_compressor/tensorflow/algorithms/__init__.py
+1-1
diff --git a/‎neural_compressor/tensorflow/algorithms/smoother/core.py
+3-3 b/‎neural_compressor/tensorflow/algorithms/smoother/core.py
+3-3
diff --git a/‎neural_compressor/tensorflow/algorithms/static_quant/__init__.py
+1-1 b/‎neural_compressor/tensorflow/algorithms/static_quant/__init__.py
+1-1
diff --git a/‎neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+30-23 b/‎neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+30-23
diff --git a/‎neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml
+1-1 b/‎neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml
+1-1
@@ -6,7 +6,7 @@ if [[ $1 = *"3x_pt" ]]; then
     python -m pip install --no-cache-dir -r requirements_pt.txt
     python setup.py pt bdist_wheel
     pip install dist/neural_compressor*.whl --force-reinstall
-elif [[ $1 = *"3x_tf" ]]; then
+elif [[ $1 = *"3x_tf"* ]]; then
     python -m pip install --no-cache-dir -r requirements_tf.txt
     python setup.py tf bdist_wheel
     pip install dist/neural_compressor*.whl --force-reinstall
 
@@ -16,7 +16,9 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
 cd /neural-compressor/test/3x || exit 1
 rm -rf torch
 rm -rf onnxrt
+rm -rf tensorflow/quantization/ptq/newapi
 mv tensorflow/keras ../3x_keras
+mv tensorflow/quantization/itex ./3x_itex
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
@@ -27,8 +29,9 @@ pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf.
 
 rm -rf tensorflow/*
 mv ../3x_keras tensorflow/keras
+mv ../3x_itex tensorflow/quantization/itex
 pip install intel-extension-for-tensorflow[cpu]
-pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow/keras 2>&1 | tee -a ${ut_log_name}
+pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow 2>&1 | tee -a ${ut_log_name}
 
 mkdir -p report
 mv *.html report
 
@@ -0,0 +1,46 @@
+#!/bin/bash
+python -c "import neural_compressor as nc"
+test_case="run 3x New TF API"
+echo "${test_case}"
+
+# install requirements
+echo "set up UT env..."
+pip install -r /neural-compressor/test/3x/tensorflow/requirements.txt
+pip install pytest-html
+pip install pytest-html-merger
+
+pip uninstall tensorflow -y
+pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
+pip install cmake
+pip install protobuf==3.20.3
+pip install horovod==0.27.0
+pip list
+
+cd /neural-compressor/test/3x || exit 1
+mv tensorflow/quantization/ptq/newapi ../3x_newapi
+rm -rf ./*
+
+LOG_DIR=/neural-compressor/log_dir
+mkdir -p ${LOG_DIR}
+ut_log_name=${LOG_DIR}/ut_3x_new_tf.log
+
+mkdir -p tensorflow/quantization/ptq
+mv ../3x_newapi tensorflow/quantization/ptq/newapi
+
+pytest -vs --disable-warnings --html=report_new_tf_quant_one_case.html --self-contained-html ./tensorflow/quantization/ptq/newapi/test_big_saved_model.py 2>&1 | tee -a ${ut_log_name}
+rm -rf tensorflow/quantization/ptq/newapi/test_big_saved_model.py
+pytest -vs --disable-warnings --html=report_new_tf_quant.html --self-contained-html ./tensorflow/quantization/ptq/newapi 2>&1 | tee -a ${ut_log_name}
+
+mkdir -p report
+mv *.html report
+pytest_html_merger -i ./report -o ./report.html
+
+cp report.html ${LOG_DIR}/
+
+if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
+    echo "Find errors in pytest case, please check the output..."
+    echo "Please search for '== FAILURES ==' or '== ERRORS =='"
+    exit 1
+fi
+
+echo "UT finished successfully! "
@@ -41,6 +41,19 @@ stages:
               uploadPath: $(UPLOAD_PATH)
               utArtifact: "ut_3x"
 
+  - stage: NewTF
+    displayName: Unit Test 3x New TF API
+    dependsOn: []
+    jobs:
+      - job:
+        displayName: Unit Test 3x New TF API
+        steps:
+          - template: template/ut-template.yml
+            parameters:
+              dockerConfigName: "commonDockerConfig"
+              utScriptFileName: "3x/run_3x_tf_new_api"
+              uploadPath: $(UPLOAD_PATH)
+              utArtifact: "ut_3x_tf_new_api"
 
   - stage: TensorFlow_baseline
     displayName: Unit Test 3x TensorFlow baseline
 
@@ -14,4 +14,4 @@
 
 
 from neural_compressor.tensorflow.algorithms.smoother import SmoothQuant
-from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor, TensorFlowAdaptor
+from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor, TensorFlowAdaptor, Tensorflow_ITEXAdaptor
@@ -28,7 +28,7 @@
 from neural_compressor.tensorflow.algorithms.smoother.scaler import SmoothQuantScaler, SmoothQuantScalerLLM
 from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig
 from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-from neural_compressor.tensorflow.utils import SPR_BASE_VERSIONS, BaseModel, TensorflowLLMModel, framework_specific_info
+from neural_compressor.tensorflow.utils import SPR_BASE_VERSIONS, BaseModel, TensorflowLLMModel, TFConfig
 
 
 class SmoothQuant:
@@ -55,8 +55,8 @@ def __init__(
         self.calib_iteration = calib_iteration
 
         self.new_api = tf.version.VERSION in SPR_BASE_VERSIONS
-        self.device = framework_specific_info["device"]
-        self.itex_mode = framework_specific_info["backend"] == "itex"
+        self.device = TFConfig.global_config["device"]
+        self.itex_mode = TFConfig.global_config["backend"] == "itex"
 
         for _, value in self.config.items():
             single_config = value
 
@@ -13,4 +13,4 @@
 # limitations under the License.
 
 from neural_compressor.tensorflow.algorithms.static_quant.keras import KerasAdaptor
-from neural_compressor.tensorflow.algorithms.static_quant.tensorflow import TensorFlowAdaptor
+from neural_compressor.tensorflow.algorithms.static_quant.tensorflow import TensorFlowAdaptor, Tensorflow_ITEXAdaptor
@@ -87,8 +87,8 @@ def __init__(self, framework_specific_info):
         cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[: -len("Adaptor")].lower())
         self.itex_mode = self.backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml"
 
-        if self.itex_mode:
-            self._check_itex()
+        # if self.itex_mode:
+        #     self._check_itex()
 
         self.query_handler = TensorflowQuery(
             local_config_file=os.path.join(os.path.dirname(__file__), cfg_yaml_name),
@@ -440,6 +440,7 @@ def _tuning_cfg_to_fw(self, tuning_cfg):
             if "activation" in tuning_cfg["op"][each_op_info]:
                 is_asymmetric = tuning_cfg["op"][each_op_info]["activation"]["scheme"] == "asym"
             self.quantize_config["op_wise_config"][op_name] = (is_perchannel, algorithm, is_asymmetric, weight_bit)
+
         self.fp32_ops = fp32_ops
         self.bf16_ops = bf16_ops
 
@@ -1520,12 +1521,6 @@ def recover_tuned_model(self, model, q_config):
 
         return converter.convert_without_calib()
 
-    def diagnosis_helper(self, fp32_model, quan_model, tune_cfg, save_path):
-        """Tensorflow diagnosis helper function."""
-        from neural_compressor.tensorflow.quantization.utils.utility import tf_diagnosis_helper
-
-        return tf_diagnosis_helper(fp32_model, quan_model, tune_cfg, save_path)
-
     def get_output_op_names(self, qmodel):
         """Get the oupur OPs's names."""
         from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
@@ -1711,7 +1706,14 @@ def __init__(self, framework_specific_info):
         super().__init__(framework_specific_info)
 
     @dump_elapsed_time("Pass quantize model")
-    def quantize(self, tune_cfg, model, data_loader, q_func=None):
+    def quantize(
+        self,
+        quant_config: StaticQuantConfig,
+        model: BaseModel,
+        calib_dataloader: Callable = None,
+        calib_iteration: int = 100,
+        q_func=None,
+    ):
         """Execute the quantize process on the specified model.
 
         Args:
@@ -1725,17 +1727,19 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
             tf.compat.v1.GraphDef: the quantized model
         """
         assert q_func is None, "quantization aware training mode is not support on tensorflow"
+        self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration
+        tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration)
         self._tuning_cfg_to_fw(tune_cfg)
         logger.debug("Dump quantization configurations:")
         logger.debug(self.quantize_config)
         from neural_compressor.tensorflow.quantization.utils.graph_converter import GraphConverter
 
-        calib_sampling_size = tune_cfg.get("calib_sampling_size", 1)
-        if isinstance(data_loader, BaseDataLoader):
-            batch_size = data_loader.batch_size
+        self.calib_sampling_size = tune_cfg.get("calib_sampling_size", 1)
+        if isinstance(calib_dataloader, BaseDataLoader):
+            batch_size = calib_dataloader.batch_size
             try:
                 for i in range(batch_size):
-                    if calib_sampling_size % (batch_size - i) == 0:
+                    if self.calib_sampling_size % (batch_size - i) == 0:
                         calib_batch_size = batch_size - i
                         if i != 0:  # pragma: no cover
                             logger.warning(
@@ -1744,17 +1748,18 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
                                 "divisible exactly by batch size"
                             )
                         break
-                tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size))
-                data_loader.batch(calib_batch_size)
+                tmp_iterations = int(math.ceil(self.calib_sampling_size / calib_batch_size))
+                calib_dataloader.batch(calib_batch_size)
                 self.quantize_config["calib_iteration"] = tmp_iterations
+
                 converted_model = GraphConverter(
                     model,
                     qt_config=self.quantize_config,
                     recipes=self.recipes,
                     int8_sequences=self.op_wise_sequences,
                     fp32_ops=self.fp32_ops,
                     bf16_ops=self.bf16_ops,
-                    data_loader=data_loader,
+                    data_loader=calib_dataloader,
                     calib_func=q_func,
                     itex_mode=self.itex_mode,
                     qdq_enabled=self.qdq_enabled,
@@ -1767,32 +1772,32 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
 
                 batch_size = get_model_input_shape(model)
                 logger.warning(
-                    "Fail to forward with batch size={}, set to {} now.".format(data_loader.batch_size, batch_size)
+                    "Fail to forward with batch size={}, set to {} now.".format(calib_dataloader.batch_size, batch_size)
                 )
-                data_loader.batch(batch_size)
-                self.quantize_config["calib_iteration"] = calib_sampling_size
+                calib_dataloader.batch(batch_size)
+                self.quantize_config["calib_iteration"] = self.calib_sampling_size
                 converted_model = GraphConverter(
                     model,
                     qt_config=self.quantize_config,
                     recipes=self.recipes,
                     int8_sequences=self.op_wise_sequences,
                     fp32_ops=self.fp32_ops,
                     bf16_ops=self.bf16_ops,
-                    data_loader=data_loader,
+                    data_loader=calib_dataloader,
                     itex_mode=self.itex_mode,
                     qdq_enabled=self.qdq_enabled,
                     new_api=self.new_api,
                     performance_only=self.performance_only,
                     use_bf16=self.use_bf16,
                 ).convert()
         else:  # pragma: no cover
-            if hasattr(data_loader, "batch_size") and calib_sampling_size % data_loader.batch_size != 0:
+            if hasattr(calib_dataloader, "batch_size") and self.calib_sampling_size % calib_dataloader.batch_size != 0:
                 iter = self.quantize_config["calib_iteration"]
                 logger.warning(
                     "Please note that calibration sampling size {} "
                     "isn't divisible exactly by batch size {}. "
                     "So the real sampling size is {}.".format(
-                        calib_sampling_size, data_loader.batch_size, data_loader.batch_size * iter
+                        self.calib_sampling_size, calib_dataloader.batch_size, calib_dataloader.batch_size * iter
                     )
                 )
             converted_model = GraphConverter(
@@ -1802,7 +1807,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
                 int8_sequences=self.op_wise_sequences,
                 fp32_ops=self.fp32_ops,
                 bf16_ops=self.bf16_ops,
-                data_loader=data_loader,
+                data_loader=calib_dataloader,
                 calib_func=q_func,
                 itex_mode=self.itex_mode,
                 qdq_enabled=self.qdq_enabled,
@@ -2438,6 +2443,8 @@ def update_opwise_config(self):
         op_wise_config = {}
         for op_name, op_config in self.quant_config.items():
             op_key_name = (op_name[0], self.unify_op_type_mapping[op_name[1]])
+            if op_key_name not in self.capability["opwise"]:
+                continue
             single_op_cap = self.capability["opwise"][op_key_name][0]
             single_op_config = {"activation": {}}
 
 
@@ -150,7 +150,7 @@
 
 -
   version:
-    name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '2.12.0', '2.13.0', '1.15.0-up1', '1.15.0-up2', '1.15.0-up3']
+    name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '2.12.0', '2.13.0', '2.14.0', '2.14.1', '2.15.0', '2.15.1', '1.15.0-up1', '1.15.0-up2', '1.15.0-up3']
 
   bf16: ['Conv2D', 'Conv3D', 'MatMul', 'BatchMatMul', 'MaxPool', 'MaxPool3D', 'AvgPool', 'AvgPool3D', 'DepthwiseConv2dNative']
   fp32: ['*'] # '*' means all op types
Original file line number	Diff line number	Diff line change
`@@ -14,4 +14,4 @@`
`14`	`14`
`15`	`15`
`16`	`16`	`from neural_compressor.tensorflow.algorithms.smoother import SmoothQuant`
`17`		`-from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor, TensorFlowAdaptor`
	`17`	`+from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor, TensorFlowAdaptor, Tensorflow_ITEXAdaptor`
Original file line number	Diff line number	Diff line change
`@@ -150,7 +150,7 @@`
`150`	`150`
`151`	`151`	`-`
`152`	`152`	`version:`
`153`		`- name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '2.12.0', '2.13.0', '1.15.0-up1', '1.15.0-up2', '1.15.0-up3']`
	`153`	`+ name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '2.12.0', '2.13.0', '2.14.0', '2.14.1', '2.15.0', '2.15.1', '1.15.0-up1', '1.15.0-up2', '1.15.0-up3']`
`154`	`154`
`155`	`155`	`bf16: ['Conv2D', 'Conv3D', 'MatMul', 'BatchMatMul', 'MaxPool', 'MaxPool3D', 'AvgPool', 'AvgPool3D', 'DepthwiseConv2dNative']`
`156`	`156`	`fp32: [''] # '' means all op types`