Skip to content

Commit d7b1e1d

Browse files
Add hybrid quantization for Flux model (#1060)
* Add hybrid quantization for Flux model * Update optimum/intel/openvino/quantization.py Co-authored-by: Nikita Savelyev <nikita.savelyev@intel.com> --------- Co-authored-by: Nikita Savelyev <nikita.savelyev@intel.com>
1 parent f6b73d0 commit d7b1e1d

File tree

4 files changed

+10
-1
lines changed

4 files changed

+10
-1
lines changed

optimum/commands/export/openvino.py

+4
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ def run(self):
354354
from optimum.intel import OVStableDiffusion3Pipeline
355355

356356
model_cls = OVStableDiffusion3Pipeline
357+
elif class_name == "FluxPipeline":
358+
from optimum.intel import OVFluxPipeline
359+
360+
model_cls = OVFluxPipeline
357361
else:
358362
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
359363

optimum/intel/openvino/quantization.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,9 @@ def _hybrid_quantization(
11501150

11511151
wc_config = copy.deepcopy(quantization_config)
11521152
wc_config.ignored_scope = wc_config.ignored_scope or {}
1153-
wc_config.ignored_scope["types"] = wc_config.ignored_scope.get("types", []) + ["Convolution"]
1153+
1154+
wc_ignored_types = ["Convolution"] if any(op.get_type_name() == "Convolution" for op in model.get_ops()) else []
1155+
wc_config.ignored_scope["types"] = wc_config.ignored_scope.get("types", []) + wc_ignored_types
11541156
compressed_model = _weight_only_quantization(model, wc_config, **kwargs)
11551157

11561158
ptq_ignored_scope = quantization_config.get_ignored_scope_instance()

tests/openvino/test_exporters_cli.py

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ class OVCLIExportTestCase(unittest.TestCase):
105105

106106
if is_transformers_version(">=", "4.45"):
107107
SUPPORTED_SD_HYBRID_ARCHITECTURES.append(("stable-diffusion-3", 9, 65))
108+
SUPPORTED_SD_HYBRID_ARCHITECTURES.append(("flux", 7, 56))
108109

109110
TEST_4BIT_CONFIGURATIONS = [
110111
("text-generation-with-past", "opt125m", "int4 --sym --group-size 128", {"int8": 4, "int4": 72}),

tests/openvino/test_quantization.py

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
from optimum.intel import (
4545
OVConfig,
46+
OVFluxPipeline,
4647
OVLatentConsistencyModelPipeline,
4748
OVModelForAudioClassification,
4849
OVModelForCausalLM,
@@ -491,6 +492,7 @@ class OVWeightCompressionTest(unittest.TestCase):
491492
SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION.extend(
492493
[
493494
(OVStableDiffusion3Pipeline, "stable-diffusion-3", 9, 65),
495+
(OVFluxPipeline, "flux", 7, 56),
494496
]
495497
)
496498

0 commit comments

Comments
 (0)