intel
diff --git a/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/index.rst.txt
+8 b/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/index.rst.txt
+8
diff --git a/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/smooth_quant/index.rst.txt
+34 b/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/smooth_quant/index.rst.txt
+34
diff --git a/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/utility/index.rst.txt
+165 b/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/smooth_quant/utility/index.rst.txt
+165
diff --git a/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/static_quant/utility/index.rst.txt
+35-35 b/‎latest/_sources/autoapi/neural_compressor/torch/algorithms/static_quant/utility/index.rst.txt
+35-35
diff --git a/‎latest/autoapi/block_mask/index.html
+1-1 b/‎latest/autoapi/block_mask/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/adaptor/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/adaptor/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras_utils/conv2d/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras_utils/conv2d/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras_utils/dense/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras_utils/dense/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras_utils/depthwise_conv2d/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras_utils/depthwise_conv2d/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras_utils/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras_utils/index.html
+1-1
diff --git a/‎latest/autoapi/neural_compressor/adaptor/keras_utils/pool2d/index.html
+1-1 b/‎latest/autoapi/neural_compressor/adaptor/keras_utils/pool2d/index.html
+1-1
@@ -0,0 +1,8 @@
+:orphan:
+
+:py:mod:`neural_compressor.torch.algorithms.smooth_quant`
+=========================================================
+
+.. py:module:: neural_compressor.torch.algorithms.smooth_quant
+
+
@@ -0,0 +1,34 @@
+:orphan:
+
+:py:mod:`neural_compressor.torch.algorithms.smooth_quant.smooth_quant`
+======================================================================
+
+.. py:module:: neural_compressor.torch.algorithms.smooth_quant.smooth_quant
+
+
+Module Contents
+---------------
+
+
+Functions
+~~~~~~~~~
+
+.. autoapisummary::
+
+   neural_compressor.torch.algorithms.smooth_quant.smooth_quant.smooth_quantize
+
+
+
+.. py:function:: smooth_quantize(model, tune_cfg, run_fn, example_inputs, inplace=True)
+
+   Execute the quantize process on the specified model.
+
+   :param model: a float model to be quantized.
+   :param tune_cfg: quantization config for ops.
+   :param run_fn: a calibration function for calibrating the model.
+   :param example_inputs: used to trace torch model.
+   :param inplace: whether to carry out model transformations in-place.
+
+   :returns: A quantized model.
+
+
@@ -0,0 +1,165 @@
+:orphan:
+
+:py:mod:`neural_compressor.torch.algorithms.smooth_quant.utility`
+=================================================================
+
+.. py:module:: neural_compressor.torch.algorithms.smooth_quant.utility
+
+
+Module Contents
+---------------
+
+Classes
+~~~~~~~
+
+.. autoapisummary::
+
+   neural_compressor.torch.algorithms.smooth_quant.utility.TorchSmoothQuant
+   neural_compressor.torch.algorithms.smooth_quant.utility.CpuInfo
+
+
+
+Functions
+~~~~~~~~~
+
+.. autoapisummary::
+
+   neural_compressor.torch.algorithms.smooth_quant.utility.generate_activation_observer
+   neural_compressor.torch.algorithms.smooth_quant.utility.check_cfg_and_qconfig
+   neural_compressor.torch.algorithms.smooth_quant.utility.get_quantizable_ops_recursively
+   neural_compressor.torch.algorithms.smooth_quant.utility.get_module
+   neural_compressor.torch.algorithms.smooth_quant.utility.set_module
+   neural_compressor.torch.algorithms.smooth_quant.utility.update_sq_scale
+   neural_compressor.torch.algorithms.smooth_quant.utility.reshape_scale_as_weight
+   neural_compressor.torch.algorithms.smooth_quant.utility.reshape_in_channel_to_last
+   neural_compressor.torch.algorithms.smooth_quant.utility.reshape_scale_as_input
+   neural_compressor.torch.algorithms.smooth_quant.utility.register_autotune
+
+
+
+.. py:function:: generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_quant_enable=False)
+
+   This is a helper method to generate an activation observer.
+
+   :param scheme: Quantization scheme to be used.
+   :type scheme: str
+   :param algorithm: What algorithm for computing the quantization parameters based on.
+   :type algorithm: str
+
+   :returns: An observer.
+
+
+.. py:function:: check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name, smooth_quant=False)
+
+   Check configs and quantization configs.
+
+   :param tune_cfg: dictionary of quantization configuration.
+   :type tune_cfg: dict
+   :param cfgs: the input configs.
+   :type cfgs: dict
+   :param op_infos_from_cfgs: op infos from configs.
+   :type op_infos_from_cfgs: dict
+   :param output_tensor_ids_op_name: dictionary of output tensor op names.
+   :type output_tensor_ids_op_name: dict
+
+   :returns: cfgs (dict).
+
+
+.. py:function:: get_quantizable_ops_recursively(model, example_inputs)
+
+   Get all quantizable ops from model.
+
+   :param model: input model
+   :type model: object
+   :param example_inputs: used to trace torch model.
+   :type example_inputs: dict|list|tuple|torch.Tensor
+
+   :returns: list of tuples of op_name and op_type.
+             cfgs (dict): dict of configuration
+   :rtype: quantizable_ops (list)
+
+
+.. py:function:: get_module(model, key)
+
+   Get module from model by key name.
+
+   :param model: original model
+   :type model: torch.nn.Module
+   :param key: module name to be replaced
+   :type key: str
+
+
+.. py:function:: set_module(model, key, new_module)
+
+   Set new module into model by key name.
+
+   :param model: original model
+   :type model: torch.nn.Module
+   :param key: module name to be replaced
+   :type key: str
+   :param new_module: new module to be inserted
+   :type new_module: torch.nn.Module
+
+
+.. py:function:: update_sq_scale(ipex_config_path, smoothquant_scale_info)
+
+   Update ipex_config.json with smoothquant scale info generated by our algorithm.
+
+   :param ipex_config_path: a path to temporary ipex_config.json file.
+   :type ipex_config_path: str
+   :param smoothquant_scale_info: a dict contains smoothquant scale info.
+   :type smoothquant_scale_info: dict
+
+
+.. py:function:: reshape_scale_as_weight(layer, scale)
+
+   Reshape the scale for weight input channel, depthwise output channel
+   :param layer:  torch module
+   :param scale: orig scale
+   :return: reshaped scale.
+
+
+.. py:function:: reshape_in_channel_to_last(layer_name, model)
+
+   Move the input channel to the last dim
+   :param layer_name: Layer name
+   :return: The reshaped weight.
+
+
+.. py:function:: reshape_scale_as_input(layer, scale)
+
+   Reshape the scale for input feature in channel
+   :param layer:
+
+   :param scale:
+   :return:
+
+
+.. py:function:: register_autotune(name)
+
+   Class decorator to register a smoothquant auto-tune subclass.
+
+   :return: the class of register
+
+
+.. py:class:: TorchSmoothQuant(model, dataloader=None, example_inputs=None, q_func=None, traced_model=None, scale_sharing=True, record_max_info=False)
+
+
+   Fake input channel quantization, for more details please refer to
+   [1] SmoothQuant: Accurate and Efficient
+   Post-Training Quantization for Large Language Models
+   [2] SPIQ: Data-Free Per-Channel Static Input Quantization
+   Currently, we only handle the layers whose smooth scale could be absorbed, we will support other layers later.
+
+   We only support inplace mode which means the model weights will be changed, you can call recover function
+   to recover the weights if needed
+
+
+.. py:class:: CpuInfo
+
+
+
+
+   Get CPU Info.
+
+
@@ -14,8 +14,8 @@ Classes
 
 .. autoapisummary::
 
-   neural_compressor.torch.algorithms.static_quant.utility.TransformerBasedModelBlockPatternDetector
    neural_compressor.torch.algorithms.static_quant.utility.Statistics
+   neural_compressor.torch.algorithms.static_quant.utility.TransformerBasedModelBlockPatternDetector
 
 
 
@@ -24,17 +24,46 @@ Functions
 
 .. autoapisummary::
 
+   neural_compressor.torch.algorithms.static_quant.utility.get_quantizable_ops_recursively
+   neural_compressor.torch.algorithms.static_quant.utility.simple_inference
+   neural_compressor.torch.algorithms.static_quant.utility.dump_model_op_stats
    neural_compressor.torch.algorithms.static_quant.utility.get_depth
    neural_compressor.torch.algorithms.static_quant.utility.get_dict_at_depth
    neural_compressor.torch.algorithms.static_quant.utility.get_element_under_depth
    neural_compressor.torch.algorithms.static_quant.utility.paser_cfgs
    neural_compressor.torch.algorithms.static_quant.utility.get_quantizable_ops_from_cfgs
-   neural_compressor.torch.algorithms.static_quant.utility.simple_inference
-   neural_compressor.torch.algorithms.static_quant.utility.dump_model_op_stats
-   neural_compressor.torch.algorithms.static_quant.utility.get_quantizable_ops_recursively
 
 
 
+.. py:function:: get_quantizable_ops_recursively(model, example_inputs)
+
+   Get all quantizable ops from model.
+
+   :param model: input model
+   :type model: object
+   :param example_inputs: used to trace torch model.
+   :type example_inputs: dict|list|tuple|torch.Tensor
+
+   :returns: list of tuples of op_name and op_type.
+             cfgs (dict): dict of configuration
+   :rtype: quantizable_ops (list)
+
+
+.. py:function:: simple_inference(q_model, example_inputs, iterations=1)
+
+   The function is used for ipex warm-up inference.
+
+
+.. py:function:: dump_model_op_stats(tune_cfg)
+
+   This is a function to dump quantizable ops of model to user.
+
+   :param tune_cfg: quantization config
+   :type tune_cfg: dict
+
+   :returns: None
+
+
 .. py:function:: get_depth(d) -> int
 
    Query the depth of the dict.
@@ -78,33 +107,10 @@ Functions
    :returns: cfgs (dict).
 
 
-.. py:function:: simple_inference(q_model, example_inputs, iterations=1)
-
-   The function is used for ipex warm-up inference.
-
-
-.. py:function:: dump_model_op_stats(tune_cfg)
-
-   This is a function to dump quantizable ops of model to user.
-
-   :param tune_cfg: quantization config
-   :type tune_cfg: dict
-
-   :returns: None
-
-
-.. py:function:: get_quantizable_ops_recursively(model, example_inputs)
+.. py:class:: Statistics(data, header, field_names, output_handle=logger.info)
 
-   Get all quantizable ops from model.
 
-   :param model: input model
-   :type model: object
-   :param example_inputs: used to trace torch model.
-   :type example_inputs: dict|list|tuple|torch.Tensor
-
-   :returns: list of tuples of op_name and op_type.
-             cfgs (dict): dict of configuration
-   :rtype: quantizable_ops (list)
+   The statistics printer.
 
 
 .. py:class:: TransformerBasedModelBlockPatternDetector(model: torch.nn.Module, pattern_lst: List[List[Union[str, int]]] = BLOCK_PATTERNS)
@@ -113,9 +119,3 @@ Functions
    Detect the attention block and FFN block in transformer-based model.
 
 
-.. py:class:: Statistics(data, header, field_names, output_handle=logger.info)
-
-
-   The statistics printer.
-
-
@@ -107,7 +107,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a8228eb0> 
+   <jinja2.runtime.BlockReference object at 0x7f38ecfa9a50> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -146,7 +146,7 @@ <h3>Functions<a class="headerlink" href="#functions" title="Permalink to this he
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a7726380> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9e51720> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -217,7 +217,7 @@ <h2>Package Contents<a class="headerlink" href="#package-contents" title="Permal
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d36a0> 
+   <jinja2.runtime.BlockReference object at 0x7f38ecbfce80> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -125,7 +125,7 @@ <h3>Classes<a class="headerlink" href="#classes" title="Permalink to this headin
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d21a0> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9a9cfd0> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -106,7 +106,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d3ac0> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9a9e950> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -106,7 +106,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d0340> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9e534c0> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -106,7 +106,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a73d07f0> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9c04640> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -106,7 +106,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d3f70> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9e51870> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
 
 
 
@@ -106,7 +106,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7fd3a76d0cd0> 
+   <jinja2.runtime.BlockReference object at 0x7f38e9c07430> 
   <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>