openvinotoolkit · andrey-churkin · Feb 7, 2025
@@ -201,9 +201,6 @@ def transform_fn(data_item):
 calibration_dataset = nncf.Dataset(val_dataset, transform_fn)
 # Step 3: Run the quantization pipeline
 quantized_model = nncf.quantize(model, calibration_dataset)
-# Step 4: Remove auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-stripped_model = nncf.strip(quantized_model)
 ```
 
 </details>

@@ -60,11 +60,7 @@ ov_quantized_model = ov.convert_model(quantized_model.cpu(), example_input=dummy
 # To OpenVINO format
 import openvino as ov
 
-# Removes auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-stripped_model = nncf.strip(quantized_model)
-
-ov_quantized_model = ov.convert_model(stripped_model)
+ov_quantized_model = ov.convert_model(quantized_model)
 ```
 
 </details>

@@ -144,10 +144,6 @@ def transform_fn(data_item):
 calibration_dataset = nncf.Dataset(val_dataset, transform_fn)
 tf_quantized_model = nncf.quantize(tf_model, calibration_dataset)
 
-# Removes auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-tf_quantized_model = nncf.strip(tf_quantized_model)
-
 ###############################################################################
 # Benchmark performance, calculate compression rate and validate accuracy
 

@@ -160,15 +160,11 @@ def transform_fn(data_item):
 # However, training for more than 1 epoch would further improve the quantized model's accuracy.
 tf_quantized_model.fit(train_dataset, epochs=1, verbose=1)
 
-# Removes auxiliary layers and operations added during the quantization process,
-# resulting in a clean, fully quantized model ready for deployment.
-stripped_model = nncf.strip(tf_quantized_model)
-
 ###############################################################################
 # Benchmark performance, calculate compression rate and validate accuracy
 
 ov_model = ov.convert_model(tf_model)
-ov_quantized_model = ov.convert_model(stripped_model)
+ov_quantized_model = ov.convert_model(tf_quantized_model)
 
 fp32_ir_path = ROOT / "mobilenet_v2_fp32.xml"
 ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)