@@ -77,7 +77,7 @@ def main_export(
77
77
model_name_or_path (`str`):
78
78
Model ID on huggingface.co or path on disk to the model repository to export.
79
79
output (`Union[str, Path]`):
80
- Path indicating the directory where to store the generated ONNX model.
80
+ Path indicating the directory where to store the generated OpenVINO model.
81
81
82
82
> Optional parameters
83
83
@@ -161,24 +161,12 @@ def main_export(
161
161
ov_config = OVConfig (quantization_config = q_config )
162
162
163
163
original_task = task
164
- task = TasksManager . map_from_synonym (task )
164
+ task = get_relevant_task (task , model_name_or_path )
165
165
framework = TasksManager .determine_framework (model_name_or_path , subfolder = subfolder , framework = framework )
166
166
library_name = TasksManager .infer_library_from_model (
167
167
model_name_or_path , subfolder = subfolder , library_name = library_name
168
168
)
169
169
170
- if task == "auto" :
171
- try :
172
- task = TasksManager .infer_task_from_model (model_name_or_path )
173
- except KeyError as e :
174
- raise KeyError (
175
- f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from { ', ' .join (TasksManager .get_all_tasks ())} . Detailed error: { e } "
176
- )
177
- except RequestsConnectionError as e :
178
- raise RequestsConnectionError (
179
- f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from { ', ' .join (TasksManager .get_all_tasks ())} . Detailed error: { e } "
180
- )
181
-
182
170
if convert_tokenizer and not is_openvino_tokenizers_available ():
183
171
logger .warning (
184
172
"`convert_tokenizer` requires openvino-tokenizers, please install it with `pip install optimum-intel[openvino-tokenizers]`"
@@ -263,42 +251,6 @@ class StoreAttr(object):
263
251
** loading_kwargs ,
264
252
)
265
253
266
- # Apply quantization in hybrid mode to Stable Diffusion before export
267
- if (
268
- library_name == "diffusers"
269
- and ov_config
270
- and ov_config .quantization_config
271
- and ov_config .quantization_config .get ("dataset" , None )
272
- ):
273
- class_name = model .__class__ .__name__
274
- if "LatentConsistencyModelPipeline" in class_name :
275
- from optimum .intel import OVLatentConsistencyModelPipeline
276
-
277
- model_cls = OVLatentConsistencyModelPipeline
278
- elif "StableDiffusionXLPipeline" in class_name :
279
- from optimum .intel import OVStableDiffusionXLPipeline
280
-
281
- model_cls = OVStableDiffusionXLPipeline
282
- elif "StableDiffusionPipeline" in class_name :
283
- from optimum .intel import OVStableDiffusionPipeline
284
-
285
- model_cls = OVStableDiffusionPipeline
286
- else :
287
- raise NotImplementedError (f"{ class_name } doesn't support quantization in hybrid mode." )
288
-
289
- model = model_cls .from_pretrained (
290
- model_id = model_name_or_path ,
291
- export = True ,
292
- quantization_config = ov_config .quantization_config ,
293
- cache_dir = cache_dir ,
294
- trust_remote_code = trust_remote_code ,
295
- revision = revision ,
296
- force_download = force_download ,
297
- use_auth_token = use_auth_token ,
298
- )
299
- model .save_pretrained (output )
300
- return
301
-
302
254
needs_pad_token_id = task == "text-classification" and getattr (model .config , "pad_token_id" , None ) is None
303
255
304
256
if needs_pad_token_id :
@@ -391,3 +343,40 @@ class StoreAttr(object):
391
343
if do_gptq_patching :
392
344
torch .cuda .is_available = orig_cuda_check
393
345
GPTQQuantizer .post_init_model = orig_post_init_model
346
+
347
+
348
+ def get_relevant_task (task , model_name_or_path ):
349
+ relevant_task = TasksManager .map_from_synonym (task )
350
+ if relevant_task == "auto" :
351
+ try :
352
+ relevant_task = TasksManager .infer_task_from_model (model_name_or_path )
353
+ except KeyError as e :
354
+ raise KeyError (
355
+ f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from { ', ' .join (TasksManager .get_all_tasks ())} . Detailed error: { e } "
356
+ )
357
+ except RequestsConnectionError as e :
358
+ raise RequestsConnectionError (
359
+ f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from { ', ' .join (TasksManager .get_all_tasks ())} . Detailed error: { e } "
360
+ )
361
+ return relevant_task
362
+
363
+
364
+ def export_optimized_diffusion_model (model_name_or_path , output , task , quantization_config ):
365
+ task = get_relevant_task (task , model_name_or_path )
366
+ if task == "latent-consistency" :
367
+ from optimum .intel import OVLatentConsistencyModelPipeline
368
+
369
+ model_cls = OVLatentConsistencyModelPipeline
370
+ elif task == "stable-diffusion-xl" :
371
+ from optimum .intel import OVStableDiffusionXLPipeline
372
+
373
+ model_cls = OVStableDiffusionXLPipeline
374
+ elif task == "stable-diffusion" :
375
+ from optimum .intel import OVStableDiffusionPipeline
376
+
377
+ model_cls = OVStableDiffusionPipeline
378
+ else :
379
+ raise NotImplementedError (f"Quantization in hybrid mode isn't supported for { task } ." )
380
+
381
+ model = model_cls .from_pretrained (model_id = model_name_or_path , quantization_config = quantization_config )
382
+ model .save_pretrained (output )
0 commit comments