@@ -87,8 +87,8 @@ def __init__(self, framework_specific_info):
87
87
cfg_yaml_name = "{}.yaml" .format (self .__class__ .__name__ [: - len ("Adaptor" )].lower ())
88
88
self .itex_mode = self .backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml"
89
89
90
- if self .itex_mode :
91
- self ._check_itex ()
90
+ # if self.itex_mode:
91
+ # self._check_itex()
92
92
93
93
self .query_handler = TensorflowQuery (
94
94
local_config_file = os .path .join (os .path .dirname (__file__ ), cfg_yaml_name ),
@@ -440,6 +440,7 @@ def _tuning_cfg_to_fw(self, tuning_cfg):
440
440
if "activation" in tuning_cfg ["op" ][each_op_info ]:
441
441
is_asymmetric = tuning_cfg ["op" ][each_op_info ]["activation" ]["scheme" ] == "asym"
442
442
self .quantize_config ["op_wise_config" ][op_name ] = (is_perchannel , algorithm , is_asymmetric , weight_bit )
443
+
443
444
self .fp32_ops = fp32_ops
444
445
self .bf16_ops = bf16_ops
445
446
@@ -1520,12 +1521,6 @@ def recover_tuned_model(self, model, q_config):
1520
1521
1521
1522
return converter .convert_without_calib ()
1522
1523
1523
- def diagnosis_helper (self , fp32_model , quan_model , tune_cfg , save_path ):
1524
- """Tensorflow diagnosis helper function."""
1525
- from neural_compressor .tensorflow .quantization .utils .utility import tf_diagnosis_helper
1526
-
1527
- return tf_diagnosis_helper (fp32_model , quan_model , tune_cfg , save_path )
1528
-
1529
1524
def get_output_op_names (self , qmodel ):
1530
1525
"""Get the oupur OPs's names."""
1531
1526
from neural_compressor .tensorflow .quantization .utils .graph_util import GraphAnalyzer
@@ -1711,7 +1706,14 @@ def __init__(self, framework_specific_info):
1711
1706
super ().__init__ (framework_specific_info )
1712
1707
1713
1708
@dump_elapsed_time ("Pass quantize model" )
1714
- def quantize (self , tune_cfg , model , data_loader , q_func = None ):
1709
+ def quantize (
1710
+ self ,
1711
+ quant_config : StaticQuantConfig ,
1712
+ model : BaseModel ,
1713
+ calib_dataloader : Callable = None ,
1714
+ calib_iteration : int = 100 ,
1715
+ q_func = None ,
1716
+ ):
1715
1717
"""Execute the quantize process on the specified model.
1716
1718
1717
1719
Args:
@@ -1725,17 +1727,19 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
1725
1727
tf.compat.v1.GraphDef: the quantized model
1726
1728
"""
1727
1729
assert q_func is None , "quantization aware training mode is not support on tensorflow"
1730
+ self .calib_sampling_size = calib_dataloader .batch_size * calib_iteration
1731
+ tune_cfg = self .parse_quant_config (quant_config , model , calib_iteration )
1728
1732
self ._tuning_cfg_to_fw (tune_cfg )
1729
1733
logger .debug ("Dump quantization configurations:" )
1730
1734
logger .debug (self .quantize_config )
1731
1735
from neural_compressor .tensorflow .quantization .utils .graph_converter import GraphConverter
1732
1736
1733
- calib_sampling_size = tune_cfg .get ("calib_sampling_size" , 1 )
1734
- if isinstance (data_loader , BaseDataLoader ):
1735
- batch_size = data_loader .batch_size
1737
+ self . calib_sampling_size = tune_cfg .get ("calib_sampling_size" , 1 )
1738
+ if isinstance (calib_dataloader , BaseDataLoader ):
1739
+ batch_size = calib_dataloader .batch_size
1736
1740
try :
1737
1741
for i in range (batch_size ):
1738
- if calib_sampling_size % (batch_size - i ) == 0 :
1742
+ if self . calib_sampling_size % (batch_size - i ) == 0 :
1739
1743
calib_batch_size = batch_size - i
1740
1744
if i != 0 : # pragma: no cover
1741
1745
logger .warning (
@@ -1744,17 +1748,18 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
1744
1748
"divisible exactly by batch size"
1745
1749
)
1746
1750
break
1747
- tmp_iterations = int (math .ceil (calib_sampling_size / calib_batch_size ))
1748
- data_loader .batch (calib_batch_size )
1751
+ tmp_iterations = int (math .ceil (self . calib_sampling_size / calib_batch_size ))
1752
+ calib_dataloader .batch (calib_batch_size )
1749
1753
self .quantize_config ["calib_iteration" ] = tmp_iterations
1754
+
1750
1755
converted_model = GraphConverter (
1751
1756
model ,
1752
1757
qt_config = self .quantize_config ,
1753
1758
recipes = self .recipes ,
1754
1759
int8_sequences = self .op_wise_sequences ,
1755
1760
fp32_ops = self .fp32_ops ,
1756
1761
bf16_ops = self .bf16_ops ,
1757
- data_loader = data_loader ,
1762
+ data_loader = calib_dataloader ,
1758
1763
calib_func = q_func ,
1759
1764
itex_mode = self .itex_mode ,
1760
1765
qdq_enabled = self .qdq_enabled ,
@@ -1767,32 +1772,32 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
1767
1772
1768
1773
batch_size = get_model_input_shape (model )
1769
1774
logger .warning (
1770
- "Fail to forward with batch size={}, set to {} now." .format (data_loader .batch_size , batch_size )
1775
+ "Fail to forward with batch size={}, set to {} now." .format (calib_dataloader .batch_size , batch_size )
1771
1776
)
1772
- data_loader .batch (batch_size )
1773
- self .quantize_config ["calib_iteration" ] = calib_sampling_size
1777
+ calib_dataloader .batch (batch_size )
1778
+ self .quantize_config ["calib_iteration" ] = self . calib_sampling_size
1774
1779
converted_model = GraphConverter (
1775
1780
model ,
1776
1781
qt_config = self .quantize_config ,
1777
1782
recipes = self .recipes ,
1778
1783
int8_sequences = self .op_wise_sequences ,
1779
1784
fp32_ops = self .fp32_ops ,
1780
1785
bf16_ops = self .bf16_ops ,
1781
- data_loader = data_loader ,
1786
+ data_loader = calib_dataloader ,
1782
1787
itex_mode = self .itex_mode ,
1783
1788
qdq_enabled = self .qdq_enabled ,
1784
1789
new_api = self .new_api ,
1785
1790
performance_only = self .performance_only ,
1786
1791
use_bf16 = self .use_bf16 ,
1787
1792
).convert ()
1788
1793
else : # pragma: no cover
1789
- if hasattr (data_loader , "batch_size" ) and calib_sampling_size % data_loader .batch_size != 0 :
1794
+ if hasattr (calib_dataloader , "batch_size" ) and self . calib_sampling_size % calib_dataloader .batch_size != 0 :
1790
1795
iter = self .quantize_config ["calib_iteration" ]
1791
1796
logger .warning (
1792
1797
"Please note that calibration sampling size {} "
1793
1798
"isn't divisible exactly by batch size {}. "
1794
1799
"So the real sampling size is {}." .format (
1795
- calib_sampling_size , data_loader .batch_size , data_loader .batch_size * iter
1800
+ self . calib_sampling_size , calib_dataloader .batch_size , calib_dataloader .batch_size * iter
1796
1801
)
1797
1802
)
1798
1803
converted_model = GraphConverter (
@@ -1802,7 +1807,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
1802
1807
int8_sequences = self .op_wise_sequences ,
1803
1808
fp32_ops = self .fp32_ops ,
1804
1809
bf16_ops = self .bf16_ops ,
1805
- data_loader = data_loader ,
1810
+ data_loader = calib_dataloader ,
1806
1811
calib_func = q_func ,
1807
1812
itex_mode = self .itex_mode ,
1808
1813
qdq_enabled = self .qdq_enabled ,
@@ -2438,6 +2443,8 @@ def update_opwise_config(self):
2438
2443
op_wise_config = {}
2439
2444
for op_name , op_config in self .quant_config .items ():
2440
2445
op_key_name = (op_name [0 ], self .unify_op_type_mapping [op_name [1 ]])
2446
+ if op_key_name not in self .capability ["opwise" ]:
2447
+ continue
2441
2448
single_op_cap = self .capability ["opwise" ][op_key_name ][0 ]
2442
2449
single_op_config = {"activation" : {}}
2443
2450
0 commit comments