Skip to content

Commit 794b276

Browse files
authored
migrate export to 2x and 3x from deprecated (#1845)
Signed-off-by: xin3he <xin3.he@intel.com>
1 parent 0eced14 commit 794b276

File tree

15 files changed

+657
-8
lines changed

15 files changed

+657
-8
lines changed

neural_compressor/experimental/export/qlinear2qdq.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17+
18+
# pragma: no cover
1719
"""Helper functions to export onnx model from QLinearops to QDQ."""
1820
from deprecated import deprecated
1921

neural_compressor/experimental/export/tf2onnx.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17+
18+
# pragma: no cover
1719
"""Helper functions to export model from TensorFlow to ONNX."""
1820

1921
import re

neural_compressor/experimental/export/torch2onnx.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17+
18+
# pragma: no cover
1719
"""Helper functions to export model from PyTorch/TensorFlow to ONNX."""
1820

1921
import os

neural_compressor/model/onnx_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
827827
def export(self, save_path, conf):
828828
"""Export Qlinear to QDQ model."""
829829
from neural_compressor.config import ONNXQlinear2QDQConfig
830-
from neural_compressor.experimental.export import onnx_qlinear_to_qdq
830+
from neural_compressor.utils.export import onnx_qlinear_to_qdq
831831

832832
if isinstance(conf, ONNXQlinear2QDQConfig):
833833
add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, self._input_name_to_nodes)

neural_compressor/model/tensorflow_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ def export(self, save_path, conf):
10091009
+ "we reset opset_version={} here".format(conf.opset_version)
10101010
)
10111011

1012-
from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx
1012+
from neural_compressor.utils.export import tf_to_fp32_onnx, tf_to_int8_onnx
10131013

10141014
inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None)
10151015
if conf.dtype == "int8":

neural_compressor/model/torch_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ def export(
418418
"but the torch version found is {}".format(Version("1.12.0"), version)
419419
)
420420

421-
from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx
421+
from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx
422422

423423
if conf.dtype == "int8":
424424
torch_to_int8_onnx(

neural_compressor/onnxrt/utils/onnx_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
648648
def export(self, save_path, conf):
649649
"""Export Qlinear to QDQ model."""
650650
from neural_compressor.config import ONNXQlinear2QDQConfig
651-
from neural_compressor.experimental.export import onnx_qlinear_to_qdq
651+
from neural_compressor.utils.export import onnx_qlinear_to_qdq
652652

653653
if isinstance(conf, ONNXQlinear2QDQConfig):
654654
if len(self._input_name_to_nodes) == 0:

neural_compressor/torch/export/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from neural_compressor.torch.export._export import export_model_for_pt2e_quant, export
15+
from neural_compressor.torch.export.pt2e_export import export_model_for_pt2e_quant, export

neural_compressor/torch/utils/utility.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
import torch
1919
from typing_extensions import TypeAlias
2020

21-
from neural_compressor.common import logger
22-
from neural_compressor.common.utils import Mode
21+
from neural_compressor.common.utils import LazyImport, Mode, logger
2322

2423
OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]
2524

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2021 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
"""Intel Neural Compressor Export."""
18+
19+
from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx
20+
from .qlinear2qdq import onnx_qlinear_to_qdq
21+
from .tf2onnx import tf_to_fp32_onnx, tf_to_int8_onnx
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2021 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
"""Helper functions to export onnx model from QLinear ops to QDQ."""
18+
from neural_compressor.adaptor.ox_utils.util import find_by_name
19+
from neural_compressor.utils import logger
20+
from neural_compressor.utils.utility import LazyImport
21+
22+
numpy_helper = LazyImport("onnx.numpy_helper")
23+
24+
25+
def check_model(model):
26+
"""Check optype for input model.
27+
28+
Args:
29+
model (ModelProto): onnx model.
30+
"""
31+
has_integerop = False
32+
has_qlinearop = False
33+
for node in model.graph.node:
34+
if node.op_type.endswith("Integer"):
35+
has_integerop = True
36+
elif node.op_type.startswith("QLinear"):
37+
has_qlinearop = True
38+
elif node.op_type in ["QAttention", "QGemm", "QEmbedLayerNormalization"]:
39+
has_qlinearop = True
40+
elif node.op_type in ["Gather"]:
41+
input_data = find_by_name(node.input[0], model.graph.initializer)
42+
if input_data is not None and numpy_helper.to_array(input_data).dtype in ["int8", "uint8"]:
43+
has_qlinearop = True
44+
if has_integerop:
45+
logger.info("This model has Integer ops, these ops will be skipped.")
46+
if has_qlinearop:
47+
return True
48+
else:
49+
logger.info("This model has no QLinear ops, save the original model.")
50+
return False
51+
52+
53+
def onnx_qlinear_to_qdq(
54+
model,
55+
input_name_to_nodes,
56+
):
57+
"""Export ONNX QLinearops model into QDQ model.
58+
59+
Args:
60+
model (ModelProto): int8 onnx model.
61+
input_name_to_nodes (dict): the mapping of tensor name and its destination nodes.
62+
"""
63+
from neural_compressor.adaptor.ox_utils.operators import QOPERATORS
64+
65+
add_nodes = []
66+
remove_nodes = []
67+
inits = []
68+
if check_model(model):
69+
for node in model.graph.node:
70+
if node.op_type in QOPERATORS:
71+
if node.output[0] not in input_name_to_nodes:
72+
continue
73+
children = []
74+
for out in node.output:
75+
children.extend(input_name_to_nodes[node.output[0]])
76+
converter = QOPERATORS[node.op_type](node, children, model.graph.initializer)
77+
done, add_node, init = converter.convert()
78+
if done:
79+
add_nodes.extend(add_node)
80+
inits.extend(init)
81+
remove_nodes.append(node)
82+
return add_nodes, remove_nodes, inits
+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2022 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
"""Helper functions to export model from TensorFlow to ONNX."""
18+
19+
import re
20+
21+
from neural_compressor.utils import logger
22+
from neural_compressor.utils.utility import LazyImport
23+
24+
t2o = LazyImport("tf2onnx")
25+
26+
27+
def _split_nodename_and_shape(name):
28+
"""Split input name with shape into name and shape."""
29+
# pattern for a node name
30+
inputs = []
31+
shapes = {}
32+
# input takes in most cases the format name:0, where 0 is the output number
33+
# in some cases placeholders don't have a rank which onnx can't handle so we let uses override the shape
34+
# by appending the same, ie : [1,28,28,3]
35+
name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?"
36+
splits = re.split(name_pattern, name)
37+
for i in range(1, len(splits), 3):
38+
inputs.append(splits[i] + ":0")
39+
if splits[i + 1] is not None:
40+
shape = [int(n) for n in splits[i + 1][1:-1].split(",")]
41+
shape = [n if n >= 0 else None for n in shape]
42+
shapes[splits[i] + ":0"] = shape
43+
if not shapes:
44+
shapes = None
45+
return inputs, shapes
46+
47+
48+
def tf_to_fp32_onnx(graph_def, save_path, opset_version=14, input_names=None, output_names=None, inputs_as_nchw=None):
49+
"""Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool.
50+
51+
Args:
52+
graph_def (graph_def to convert): fp32 graph_def.
53+
save_path (str): save path of ONNX model.
54+
opset_version (int, optional): opset version. Defaults to 14.
55+
input_names (list, optional): input names. Defaults to None.
56+
output_names (list, optional): output names. Defaults to None.
57+
inputs_as_nchw (list, optional): transpose the input. Defaults to None.
58+
"""
59+
shape_override = None
60+
if isinstance(input_names, str):
61+
input_names, shape_override = _split_nodename_and_shape(input_names)
62+
else:
63+
input_names[:] = [o + ":0" for o in input_names]
64+
output_names[:] = [o + ":0" for o in output_names]
65+
t2o.convert.from_graph_def(
66+
graph_def=graph_def,
67+
input_names=input_names,
68+
output_names=output_names,
69+
inputs_as_nchw=inputs_as_nchw,
70+
shape_override=shape_override,
71+
opset=opset_version,
72+
output_path=save_path,
73+
)
74+
info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
75+
logger.info("*" * len(info))
76+
logger.info(info)
77+
logger.info("*" * len(info))
78+
79+
80+
def tf_to_int8_onnx(
81+
int8_model, save_path, opset_version: int = 14, input_names=None, output_names=None, inputs_as_nchw=None
82+
):
83+
"""Export INT8 Tensorflow model into INT8 ONNX model.
84+
85+
Args:
86+
int8_model (tensorflow ITEX QDQ model): int8 model.
87+
save_path (str): save path of ONNX model.
88+
opset_version (int, optional): opset version. Defaults to 14.
89+
input_names (list, optional): input names. Defaults to None.
90+
output_names (list, optional): output names. Defaults to None.
91+
inputs_as_nchw (list, optional): transpose the input. Defaults to None.
92+
"""
93+
shape_override = None
94+
if isinstance(input_names, str):
95+
input_names, shape_override = _split_nodename_and_shape(input_names)
96+
else:
97+
input_names[:] = [o + ":0" for o in input_names]
98+
output_names[:] = [o + ":0" for o in output_names]
99+
onnx_convert_graph = "./converted_graph.onnx"
100+
from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter
101+
102+
TensorflowQDQToOnnxQDQConverter(
103+
int8_model, input_names, output_names, shape_override, inputs_as_nchw, opset_version
104+
).convert(onnx_convert_graph)
105+
106+
import onnxruntime as ort
107+
108+
sess_options = ort.SessionOptions()
109+
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
110+
sess_options.optimized_model_filepath = save_path
111+
import onnx
112+
113+
model = onnx.load(onnx_convert_graph)
114+
ort.InferenceSession(model.SerializeToString(), sess_options)
115+
info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
116+
logger.info("*" * len(info))
117+
logger.info(info)
118+
logger.info("*" * len(info))

0 commit comments

Comments
 (0)