Skip to content

Commit 4afbcfd

Browse files
committedApr 4, 2024
remove quantization from main_export
1 parent e597e13 commit 4afbcfd

File tree

2 files changed

+57
-53
lines changed

2 files changed

+57
-53
lines changed
 

‎optimum/commands/export/openvino.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def parse_args(parser: "ArgumentParser"):
153153
return parse_args_openvino(parser)
154154

155155
def run(self):
156-
from ...exporters.openvino.__main__ import main_export
156+
from ...exporters.openvino.__main__ import main_export, get_relevant_task, export_optimized_diffusion_model
157157
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
158158

159159
if self.args.fp16:
@@ -200,17 +200,32 @@ def run(self):
200200
quantization_config["dataset"] = self.args.dataset
201201
ov_config = OVConfig(quantization_config=quantization_config)
202202

203+
library_name = TasksManager.infer_library_from_model(self.args.model)
204+
task = get_relevant_task(self.args.task, self.args.model)
205+
saved_dir = self.args.output
206+
207+
if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"):
208+
import tempfile
209+
from copy import deepcopy
210+
saved_dir = tempfile.mkdtemp()
211+
quantization_config = deepcopy(ov_config.quantization_config)
212+
ov_config.quantization_config = {}
213+
203214
# TODO : add input shapes
204215
main_export(
205216
model_name_or_path=self.args.model,
206-
output=self.args.output,
207-
task=self.args.task,
217+
output=saved_dir,
218+
task=task,
208219
framework=self.args.framework,
209220
cache_dir=self.args.cache_dir,
210221
trust_remote_code=self.args.trust_remote_code,
211222
pad_token_id=self.args.pad_token_id,
212223
ov_config=ov_config,
213224
stateful=not self.args.disable_stateful,
214225
convert_tokenizer=self.args.convert_tokenizer,
226+
library_name=library_name,
215227
# **input_shapes,
216228
)
229+
230+
if saved_dir != self.args.output:
231+
export_optimized_diffusion_model(saved_dir, self.args.output, task, quantization_config)

‎optimum/exporters/openvino/__main__.py

+39-50
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def main_export(
7777
model_name_or_path (`str`):
7878
Model ID on huggingface.co or path on disk to the model repository to export.
7979
output (`Union[str, Path]`):
80-
Path indicating the directory where to store the generated ONNX model.
80+
Path indicating the directory where to store the generated OpenVINO model.
8181
8282
> Optional parameters
8383
@@ -161,24 +161,12 @@ def main_export(
161161
ov_config = OVConfig(quantization_config=q_config)
162162

163163
original_task = task
164-
task = TasksManager.map_from_synonym(task)
164+
task = get_relevant_task(task, model_name_or_path)
165165
framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)
166166
library_name = TasksManager.infer_library_from_model(
167167
model_name_or_path, subfolder=subfolder, library_name=library_name
168168
)
169169

170-
if task == "auto":
171-
try:
172-
task = TasksManager.infer_task_from_model(model_name_or_path)
173-
except KeyError as e:
174-
raise KeyError(
175-
f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
176-
)
177-
except RequestsConnectionError as e:
178-
raise RequestsConnectionError(
179-
f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
180-
)
181-
182170
if convert_tokenizer and not is_openvino_tokenizers_available():
183171
logger.warning(
184172
"`convert_tokenizer` requires openvino-tokenizers, please install it with `pip install optimum-intel[openvino-tokenizers]`"
@@ -263,42 +251,6 @@ class StoreAttr(object):
263251
**loading_kwargs,
264252
)
265253

266-
# Apply quantization in hybrid mode to Stable Diffusion before export
267-
if (
268-
library_name == "diffusers"
269-
and ov_config
270-
and ov_config.quantization_config
271-
and ov_config.quantization_config.get("dataset", None)
272-
):
273-
class_name = model.__class__.__name__
274-
if "LatentConsistencyModelPipeline" in class_name:
275-
from optimum.intel import OVLatentConsistencyModelPipeline
276-
277-
model_cls = OVLatentConsistencyModelPipeline
278-
elif "StableDiffusionXLPipeline" in class_name:
279-
from optimum.intel import OVStableDiffusionXLPipeline
280-
281-
model_cls = OVStableDiffusionXLPipeline
282-
elif "StableDiffusionPipeline" in class_name:
283-
from optimum.intel import OVStableDiffusionPipeline
284-
285-
model_cls = OVStableDiffusionPipeline
286-
else:
287-
raise NotImplementedError(f"{class_name} doesn't support quantization in hybrid mode.")
288-
289-
model = model_cls.from_pretrained(
290-
model_id=model_name_or_path,
291-
export=True,
292-
quantization_config=ov_config.quantization_config,
293-
cache_dir=cache_dir,
294-
trust_remote_code=trust_remote_code,
295-
revision=revision,
296-
force_download=force_download,
297-
use_auth_token=use_auth_token,
298-
)
299-
model.save_pretrained(output)
300-
return
301-
302254
needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
303255

304256
if needs_pad_token_id:
@@ -391,3 +343,40 @@ class StoreAttr(object):
391343
if do_gptq_patching:
392344
torch.cuda.is_available = orig_cuda_check
393345
GPTQQuantizer.post_init_model = orig_post_init_model
346+
347+
348+
def get_relevant_task(task, model_name_or_path):
349+
relevant_task = TasksManager.map_from_synonym(task)
350+
if relevant_task == "auto":
351+
try:
352+
relevant_task = TasksManager.infer_task_from_model(model_name_or_path)
353+
except KeyError as e:
354+
raise KeyError(
355+
f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
356+
)
357+
except RequestsConnectionError as e:
358+
raise RequestsConnectionError(
359+
f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
360+
)
361+
return relevant_task
362+
363+
364+
def export_optimized_diffusion_model(model_name_or_path, output, task, quantization_config):
365+
task = get_relevant_task(task, model_name_or_path)
366+
if task == "latent-consistency":
367+
from optimum.intel import OVLatentConsistencyModelPipeline
368+
369+
model_cls = OVLatentConsistencyModelPipeline
370+
elif task == "stable-diffusion-xl":
371+
from optimum.intel import OVStableDiffusionXLPipeline
372+
373+
model_cls = OVStableDiffusionXLPipeline
374+
elif task == "stable-diffusion":
375+
from optimum.intel import OVStableDiffusionPipeline
376+
377+
model_cls = OVStableDiffusionPipeline
378+
else:
379+
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for {task}.")
380+
381+
model = model_cls.from_pretrained(model_id=model_name_or_path, quantization_config=quantization_config)
382+
model.save_pretrained(output)

0 commit comments

Comments
 (0)