13
13
# limitations under the License.
14
14
15
15
import logging
16
- import os
17
16
from pathlib import Path
18
17
from typing import Any , Callable , Dict , Optional , Union
19
18
20
19
from requests .exceptions import ConnectionError as RequestsConnectionError
21
- from transformers import AutoConfig , PreTrainedTokenizerBase
20
+ from transformers import AutoConfig , AutoTokenizer , PreTrainedTokenizerBase
22
21
23
22
from optimum .exporters import TasksManager
24
- from optimum .exporters .onnx import __main__ as optimum_main
25
- from optimum .exporters .onnx .base import OnnxConfig , OnnxConfigWithPast
26
- from optimum .utils import DEFAULT_DUMMY_SHAPES
27
- from optimum .utils .save_utils import maybe_load_preprocessors , maybe_save_preprocessors
23
+ from optimum .exporters .onnx .base import OnnxConfig
24
+ from optimum .utils .save_utils import maybe_load_preprocessors
28
25
29
26
from ...intel .utils .import_utils import (
30
- is_nncf_available ,
31
27
is_openvino_tokenizers_available ,
32
28
is_optimum_version ,
33
29
is_transformers_version ,
34
30
)
35
- from .convert import export_models , export_tokenizer
36
- from .stateful import ensure_export_task_support_stateful
31
+ from .convert import export_from_model , export_tokenizer
37
32
38
33
39
34
if is_optimum_version (">=" , "1.16.0" ):
45
40
"whisper" ,
46
41
]
47
42
48
- OV_XML_FILE_NAME = "openvino_model.xml"
49
- _MAX_UNCOMPRESSED_SIZE = 1e9
50
43
51
44
logger = logging .getLogger (__name__ )
52
45
@@ -143,70 +136,11 @@ def main_export(
143
136
>>> main_export("gpt2", output="gpt2_onnx/")
144
137
```
145
138
"""
146
- if (
147
- compression_option is not None
148
- and compression_option != "fp16"
149
- and compression_option != "fp32"
150
- and not is_nncf_available ()
151
- ):
152
- raise ImportError (
153
- f"Compression of the weights to { compression_option } requires nncf, please install it with `pip install nncf`"
154
- )
155
-
156
- model_kwargs = model_kwargs or {}
157
-
158
- output = Path (output )
159
- if not output .exists ():
160
- output .mkdir (parents = True )
161
139
162
140
original_task = task
163
141
task = TasksManager .map_from_synonym (task )
164
-
165
- # Patch the modules to export of GPTQ models w/o GPU
166
- do_gptq_patching = False
167
- try :
168
- config = AutoConfig .from_pretrained (model_name_or_path , trust_remote_code = trust_remote_code )
169
- model_type = config .model_type .replace ("_" , "-" )
170
- config_dict = config .to_dict ()
171
- quantization_config = config_dict .get ("quantization_config" , None )
172
- do_gptq_patching = quantization_config and quantization_config ["quant_method" ] == "gptq"
173
- except Exception :
174
- model_type = None
175
- pass
176
-
177
- if do_gptq_patching :
178
- import torch
179
-
180
- torch .set_default_dtype (torch .float32 )
181
- orig_cuda_check = torch .cuda .is_available
182
- torch .cuda .is_available = lambda : True
183
-
184
- from optimum .gptq import GPTQQuantizer
185
-
186
- orig_post_init_model = GPTQQuantizer .post_init_model
187
-
188
- def post_init_model (self , model ):
189
- from auto_gptq import exllama_set_max_input_length
190
-
191
- class StoreAttr (object ):
192
- pass
193
-
194
- model .quantize_config = StoreAttr ()
195
- model .quantize_config .desc_act = self .desc_act
196
- if self .desc_act and not self .disable_exllama and self .max_input_length is not None :
197
- model = exllama_set_max_input_length (model , self .max_input_length )
198
- return model
199
-
200
- GPTQQuantizer .post_init_model = post_init_model
201
-
202
142
framework = TasksManager .determine_framework (model_name_or_path , subfolder = subfolder , framework = framework )
203
-
204
- # get the shapes to be used to generate dummy inputs
205
- input_shapes = {}
206
- for input_name in DEFAULT_DUMMY_SHAPES .keys ():
207
- input_shapes [input_name ] = (
208
- kwargs_shapes [input_name ] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES [input_name ]
209
- )
143
+ library_name = TasksManager .infer_library_from_model (model_name_or_path , subfolder = subfolder )
210
144
211
145
if task == "auto" :
212
146
try :
@@ -220,9 +154,44 @@ class StoreAttr(object):
220
154
f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from { ', ' .join (TasksManager .get_all_tasks ())} . Detailed error: { e } "
221
155
)
222
156
157
+ if convert_tokenizer and not is_openvino_tokenizers_available ():
158
+ logger .warning (
159
+ "`convert_tokenizer` requires openvino-tokenizers, please install it with `pip install optimum-intel[openvino-tokenizers]`"
160
+ )
161
+ convert_tokenizer = False
162
+
163
+ custom_architecture = False
223
164
loading_kwargs = {}
224
- if is_transformers_version (">=" , "4.36" ) and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED :
225
- loading_kwargs ["attn_implementation" ] = "eager"
165
+ if library_name == "transformers" :
166
+ config = AutoConfig .from_pretrained (
167
+ model_name_or_path ,
168
+ subfolder = subfolder ,
169
+ revision = revision ,
170
+ cache_dir = cache_dir ,
171
+ use_auth_token = use_auth_token ,
172
+ local_files_only = local_files_only ,
173
+ force_download = force_download ,
174
+ trust_remote_code = trust_remote_code ,
175
+ )
176
+ model_type = config .model_type .replace ("_" , "-" )
177
+
178
+ if model_type not in TasksManager ._SUPPORTED_MODEL_TYPE :
179
+ custom_architecture = True
180
+ elif task not in TasksManager .get_supported_tasks_for_model_type (
181
+ model_type , exporter = "onnx" , library_name = library_name
182
+ ):
183
+ if original_task == "auto" :
184
+ autodetected_message = " (auto-detected)"
185
+ else :
186
+ autodetected_message = ""
187
+ model_tasks = TasksManager .get_supported_tasks_for_model_type (
188
+ model_type , exporter = "onnx" , library_name = library_name
189
+ )
190
+ raise ValueError (
191
+ f"Asked to export a { model_type } model for the task { task } { autodetected_message } , but the Optimum OpenVINO exporter only supports the tasks { ', ' .join (model_tasks .keys ())} for { model_type } . Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task { task } to be supported in the ONNX export for { model_type } ."
192
+ )
193
+ if is_transformers_version (">=" , "4.36" ) and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED :
194
+ loading_kwargs ["attn_implementation" ] = "eager"
226
195
227
196
model = TasksManager .get_model_from_task (
228
197
task ,
@@ -239,37 +208,35 @@ class StoreAttr(object):
239
208
** loading_kwargs ,
240
209
)
241
210
242
- custom_architecture = False
243
- is_stable_diffusion = "stable-diffusion" in task
244
- model_type = "stable-diffusion" if is_stable_diffusion else model .config .model_type .replace ("_" , "-" )
245
-
246
- if not is_stable_diffusion :
247
- if model_type in TasksManager ._UNSUPPORTED_CLI_MODEL_TYPE :
248
- raise ValueError (
249
- f"{ model_type } is not supported yet. Only { TasksManager ._SUPPORTED_CLI_MODEL_TYPE } are supported. "
250
- f"If you want to support { model_type } please propose a PR or open up an issue."
251
- )
252
- if model .config .model_type .replace ("-" , "_" ) not in TasksManager .get_supported_model_type_for_task (
253
- task , exporter = "onnx"
254
- ):
255
- custom_architecture = True
211
+ needs_pad_token_id = task == "text-classification" and getattr (model .config , "pad_token_id" , None ) is None
256
212
257
- if custom_architecture and custom_onnx_configs is None :
258
- raise ValueError (
259
- "Trying to export a model with a custom architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models."
260
- )
213
+ if needs_pad_token_id :
214
+ if pad_token_id is not None :
215
+ model .config .pad_token_id = pad_token_id
216
+ else :
217
+ tok = AutoTokenizer .from_pretrained (model_name_or_path )
218
+ pad_token_id = getattr (tok , "pad_token_id" , None )
219
+ if pad_token_id is None :
220
+ raise ValueError (
221
+ "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument"
222
+ )
223
+ model .config .pad_token_id = pad_token_id
261
224
262
- if custom_architecture and original_task == "auto" :
263
- raise ValueError (
264
- f'Automatic task detection is not supported with custom architectures. Please specify the `task` argument. Suggestion: task="{ task } " (or task="{ task } -with-past" if the model is decoder-based and supports KV cache)'
265
- )
225
+ if "stable-diffusion" in task :
226
+ model_type = "stable-diffusion"
227
+ elif hasattr (model .config , "export_model_type" ):
228
+ model_type = model .config .export_model_type .replace ("_" , "-" )
229
+ else :
230
+ model_type = model .config .model_type .replace ("_" , "-" )
266
231
267
232
if (
268
233
not custom_architecture
269
- and not is_stable_diffusion
270
- and task + "-with-past" in TasksManager .get_supported_tasks_for_model_type (model_type , "onnx" )
234
+ and library_name != "diffusers"
235
+ and task + "-with-past"
236
+ in TasksManager .get_supported_tasks_for_model_type (model_type , exporter = "onnx" , library_name = library_name )
271
237
):
272
- if original_task == "auto" : # Make -with-past the default if --task was not explicitely specified
238
+ # Make -with-past the default if --task was not explicitely specified
239
+ if original_task == "auto" :
273
240
task = task + "-with-past"
274
241
else :
275
242
logger .info (
@@ -286,127 +253,45 @@ class StoreAttr(object):
286
253
possible_synonyms = ""
287
254
logger .info (f"Automatic task detection to { task } { possible_synonyms } ." )
288
255
289
- task_support_stateful = ensure_export_task_support_stateful (task )
290
- stateful = stateful and task_support_stateful
291
-
292
256
preprocessors = maybe_load_preprocessors (
293
257
model_name_or_path , subfolder = subfolder , trust_remote_code = trust_remote_code
294
258
)
295
259
296
- onnx_config , models_and_onnx_configs = optimum_main . _get_submodels_and_onnx_configs (
260
+ export_from_model (
297
261
model = model ,
262
+ output = output ,
298
263
task = task ,
299
- monolith = False ,
300
- custom_onnx_configs = custom_onnx_configs if custom_onnx_configs is not None else {},
301
- custom_architecture = custom_architecture ,
264
+ compression_option = compression_option ,
265
+ compression_ratio = compression_ratio ,
266
+ stateful = stateful ,
267
+ model_kwargs = model_kwargs ,
268
+ custom_onnx_configs = custom_onnx_configs ,
302
269
fn_get_submodels = fn_get_submodels ,
303
270
preprocessors = preprocessors ,
304
- _variant = "default" ,
305
- legacy = False ,
271
+ device = device ,
272
+ ** kwargs_shapes ,
306
273
)
307
274
308
- if compression_option is None :
309
- num_parameters = model .num_parameters () if not is_stable_diffusion else model .unet .num_parameters ()
310
- if num_parameters >= _MAX_UNCOMPRESSED_SIZE :
311
- if is_nncf_available ():
312
- compression_option = "int8"
313
- logger .info ("The model weights will be quantized to int8." )
314
- else :
315
- logger .warning (
316
- "The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
317
- "please install it with `pip install nncf`"
318
- )
319
-
320
- if not is_stable_diffusion :
321
- needs_pad_token_id = (
322
- isinstance (onnx_config , OnnxConfigWithPast )
323
- and getattr (model .config , "pad_token_id" , None ) is None
324
- and task in ["text-classification" ]
325
- )
326
-
327
- tokenizer = next (
328
- (preprocessor for preprocessor in preprocessors if isinstance (preprocessor , PreTrainedTokenizerBase )), None
329
- )
275
+ if convert_tokenizer :
276
+ if library_name != "diffusers" :
277
+ tokenizer = next (
278
+ (preprocessor for preprocessor in preprocessors if isinstance (preprocessor , PreTrainedTokenizerBase )),
279
+ None ,
280
+ )
330
281
331
- if needs_pad_token_id :
332
- if pad_token_id is not None :
333
- model .config .pad_token_id = pad_token_id
334
- elif tokenizer is not None :
282
+ if tokenizer is not None :
335
283
try :
336
- model .config .pad_token_id = tokenizer .pad_token_id
337
- except Exception :
338
- raise ValueError (
339
- "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument"
284
+ export_tokenizer (tokenizer , output )
285
+ except Exception as exception :
286
+ logger .warning (
287
+ "Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer "
288
+ f"models won't be generated. Exception: { exception } "
340
289
)
341
- # Saving the model config and preprocessor as this is needed sometimes.
342
- model .config .save_pretrained (output )
343
- generation_config = getattr (model , "generation_config" , None )
344
- if generation_config is not None :
345
- generation_config .save_pretrained (output )
346
- maybe_save_preprocessors (model_name_or_path , output )
347
-
348
- if convert_tokenizer and tokenizer is not None and is_openvino_tokenizers_available ():
349
- try :
350
- export_tokenizer (tokenizer , output )
351
- except Exception as exception :
352
- logger .warning (
353
- "Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer "
354
- f"models won't be generated. Exception: { exception } "
355
- )
356
-
357
- if model .config .is_encoder_decoder and task .startswith ("text-generation" ):
358
- raise ValueError (
359
- f"model.config.is_encoder_decoder is True and task is `{ task } `, which are incompatible. If the task was auto-inferred, please fill a bug report"
360
- f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model,"
361
- f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`."
362
- )
363
-
364
- files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_onnx_configs .keys ()]
365
- else :
366
- # save the subcomponent configuration
367
- for model_name in models_and_onnx_configs :
368
- subcomponent = models_and_onnx_configs [model_name ][0 ]
369
- if hasattr (subcomponent , "save_config" ):
370
- subcomponent .save_config (output / model_name )
371
- elif hasattr (subcomponent , "config" ) and hasattr (subcomponent .config , "save_pretrained" ):
372
- subcomponent .config .save_pretrained (output / model_name )
373
-
374
- files_subpaths = [os .path .join (name_dir , OV_XML_FILE_NAME ) for name_dir in models_and_onnx_configs ]
375
-
376
- # Saving the additional components needed to perform inference.
377
- model .scheduler .save_pretrained (output .joinpath ("scheduler" ))
378
-
379
- feature_extractor = getattr (model , "feature_extractor" , None )
380
- if feature_extractor is not None :
381
- feature_extractor .save_pretrained (output .joinpath ("feature_extractor" ))
382
-
383
- tokenizer = getattr (model , "tokenizer" , None )
384
- if tokenizer is not None :
385
- tokenizer .save_pretrained (output .joinpath ("tokenizer" ))
386
- if convert_tokenizer and is_openvino_tokenizers_available ():
290
+ else :
291
+ tokenizer = getattr (model , "tokenizer" , None )
292
+ if tokenizer is not None :
387
293
export_tokenizer (tokenizer , output )
388
294
389
- tokenizer_2 = getattr (model , "tokenizer_2" , None )
390
- if tokenizer_2 is not None :
391
- tokenizer_2 .save_pretrained (output .joinpath ("tokenizer_2" ))
392
- if convert_tokenizer and is_openvino_tokenizers_available ():
393
- export_tokenizer (tokenizer , output , suffix = "_2" )
394
-
395
- model .save_config (output )
396
-
397
- export_models (
398
- models_and_onnx_configs = models_and_onnx_configs ,
399
- output_dir = output ,
400
- output_names = files_subpaths ,
401
- input_shapes = input_shapes ,
402
- device = device ,
403
- compression_option = compression_option ,
404
- compression_ratio = compression_ratio ,
405
- stateful = stateful ,
406
- model_kwargs = model_kwargs ,
407
- )
408
-
409
- # Unpatch modules after GPTQ export
410
- if do_gptq_patching :
411
- torch .cuda .is_available = orig_cuda_check
412
- GPTQQuantizer .post_init_model = orig_post_init_model
295
+ tokenizer_2 = getattr (model , "tokenizer_2" , None )
296
+ if tokenizer_2 is not None :
297
+ export_tokenizer (tokenizer_2 , output , suffix = "_2" )
0 commit comments