Skip to content

Commit 58aec63

Browse files
authored
Add support FluxFill inpainting pipeline (#1095)
* add support FluxFill inpainting pipeline * add tests * register dummy model class * enable cli export tests
1 parent 124e4ca commit 58aec63

10 files changed

+106
-28
lines changed

optimum/exporters/openvino/convert.py

+26-13
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
_torch_version,
4444
_transformers_version,
4545
compare_versions,
46+
is_diffusers_version,
4647
is_openvino_tokenizers_version,
4748
is_tokenizers_version,
4849
is_transformers_version,
@@ -988,24 +989,36 @@ def _get_submodels_and_export_configs(
988989
def get_diffusion_models_for_export_ext(
989990
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
990991
):
991-
try:
992-
from diffusers import (
993-
StableDiffusion3Img2ImgPipeline,
994-
StableDiffusion3InpaintPipeline,
995-
StableDiffusion3Pipeline,
996-
)
992+
if is_diffusers_version(">=", "0.29.0"):
993+
from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline
997994

998-
is_sd3 = isinstance(
999-
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
1000-
)
1001-
except ImportError:
995+
sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline]
996+
if is_diffusers_version(">=", "0.30.0"):
997+
from diffusers import StableDiffusion3InpaintPipeline
998+
999+
sd3_pipes.append(StableDiffusion3InpaintPipeline)
1000+
1001+
is_sd3 = isinstance(pipeline, tuple(sd3_pipes))
1002+
else:
10021003
is_sd3 = False
10031004

1004-
try:
1005+
if is_diffusers_version(">=", "0.30.0"):
10051006
from diffusers import FluxPipeline
10061007

1007-
is_flux = isinstance(pipeline, FluxPipeline)
1008-
except ImportError:
1008+
flux_pipes = [FluxPipeline]
1009+
1010+
if is_diffusers_version(">=", "0.31.0"):
1011+
from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline
1012+
1013+
flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline])
1014+
1015+
if is_diffusers_version(">=", "0.32.0"):
1016+
from diffusers import FluxFillPipeline
1017+
1018+
flux_pipes.append(FluxFillPipeline)
1019+
1020+
is_flux = isinstance(pipeline, tuple(flux_pipes))
1021+
else:
10091022
is_flux = False
10101023

10111024
if not is_sd3 and not is_flux:

optimum/exporters/openvino/model_configs.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,12 @@
5656
)
5757
from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig
5858

59-
from ...intel.utils.import_utils import _transformers_version, is_diffusers_version, is_transformers_version
59+
from ...intel.utils.import_utils import (
60+
_transformers_version,
61+
is_diffusers_available,
62+
is_diffusers_version,
63+
is_transformers_version,
64+
)
6065
from .model_patcher import (
6166
AquilaModelPatcher,
6267
ArcticModelPatcher,
@@ -119,6 +124,10 @@ def init_model_configs():
119124
"image-text-to-text"
120125
] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"]
121126

127+
if is_diffusers_available() and "fill" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS:
128+
TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["fill"] = "FluxFillPipeline"
129+
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["fill"] = {"flux": "FluxFillPipeline"}
130+
122131
supported_model_types = [
123132
"_SUPPORTED_MODEL_TYPE",
124133
"_DIFFUSERS_SUPPORTED_MODEL_TYPE",

optimum/intel/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
"OVFluxPipeline",
127127
"OVFluxImg2ImgPipeline",
128128
"OVFluxInpaintPipeline",
129+
"OVFluxFillPipeline",
129130
"OVPipelineForImage2Image",
130131
"OVPipelineForText2Image",
131132
"OVPipelineForInpainting",
@@ -148,6 +149,7 @@
148149
"OVFluxPipeline",
149150
"OVFluxImg2ImgPipeline",
150151
"OVFluxInpaintPipeline",
152+
"OVFluxFillPipeline",
151153
"OVPipelineForImage2Image",
152154
"OVPipelineForText2Image",
153155
"OVPipelineForInpainting",

optimum/intel/openvino/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
if is_diffusers_available():
8383
from .modeling_diffusion import (
8484
OVDiffusionPipeline,
85+
OVFluxFillPipeline,
8586
OVFluxImg2ImgPipeline,
8687
OVFluxInpaintPipeline,
8788
OVFluxPipeline,

optimum/intel/openvino/modeling_diffusion.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@
101101
FluxImg2ImgPipeline = object
102102
FluxInpaintPipeline = object
103103

104+
if is_diffusers_version(">=", "0.32.0"):
105+
from diffusers import FluxFillPipeline
106+
else:
107+
FluxFillPipeline = object
108+
104109

105110
DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
106111
DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3"
@@ -1458,17 +1463,23 @@ class OVFluxPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxPip
14581463

14591464

14601465
class OVFluxImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxImg2ImgPipeline):
1461-
main_input_name = "prompt"
1466+
main_input_name = "image"
14621467
export_feature = "image-to-image"
14631468
auto_model_class = FluxImg2ImgPipeline
14641469

14651470

14661471
class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxInpaintPipeline):
1467-
main_input_name = "prompt"
1472+
main_input_name = "image"
14681473
export_feature = "inpainting"
14691474
auto_model_class = FluxInpaintPipeline
14701475

14711476

1477+
class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline):
1478+
main_input_name = "image"
1479+
export_feature = "inpainting"
1480+
auto_model_class = FluxFillPipeline
1481+
1482+
14721483
SUPPORTED_OV_PIPELINES = [
14731484
OVStableDiffusionPipeline,
14741485
OVStableDiffusionImg2ImgPipeline,
@@ -1537,6 +1548,10 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
15371548
OV_INPAINT_PIPELINES_MAPPING["flux"] = OVFluxInpaintPipeline
15381549
OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxImg2ImgPipeline
15391550

1551+
if is_diffusers_version(">=", "0.32.0"):
1552+
OV_INPAINT_PIPELINES_MAPPING["flux-fill"] = OVFluxFillPipeline
1553+
SUPPORTED_OV_PIPELINES.append(OVFluxFillPipeline)
1554+
15401555
SUPPORTED_OV_PIPELINES_MAPPINGS = [
15411556
OV_TEXT2IMAGE_PIPELINES_MAPPING,
15421557
OV_IMAGE2IMAGE_PIPELINES_MAPPING,

optimum/intel/openvino/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
127127
"stable-diffusion-3": "OVStableDiffusion3Pipeline",
128128
"flux": "OVFluxPipeline",
129+
"flux-fill": "OVFluxFillPipeline",
129130
"pix2struct": "OVModelForPix2Struct",
130131
"latent-consistency": "OVLatentConsistencyModelPipeline",
131132
"open_clip_text": "OVModelOpenCLIPText",

optimum/intel/utils/dummy_openvino_and_diffusers_objects.py

+11
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,14 @@ def __init__(self, *args, **kwargs):
211211
@classmethod
212212
def from_pretrained(cls, *args, **kwargs):
213213
requires_backends(cls, ["openvino", "diffusers"])
214+
215+
216+
class OVFluxFillPipeline(metaclass=DummyObject):
217+
_backends = ["openvino", "diffusers"]
218+
219+
def __init__(self, *args, **kwargs):
220+
requires_backends(self, ["openvino", "diffusers"])
221+
222+
@classmethod
223+
def from_pretrained(cls, *args, **kwargs):
224+
requires_backends(cls, ["openvino", "diffusers"])

tests/openvino/test_diffusion.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -667,13 +667,14 @@ class OVPipelineForInpaintingTest(unittest.TestCase):
667667
if is_transformers_version(">=", "4.40.0"):
668668
SUPPORTED_ARCHITECTURES.append("stable-diffusion-3")
669669
SUPPORTED_ARCHITECTURES.append("flux")
670+
SUPPORTED_ARCHITECTURES.append("flux-fill")
670671

671672
AUTOMODEL_CLASS = AutoPipelineForInpainting
672673
OVMODEL_CLASS = OVPipelineForInpainting
673674

674675
TASK = "inpainting"
675676

676-
def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"):
677+
def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil", model_arch=""):
677678
inputs = _generate_prompts(batch_size=batch_size)
678679

679680
inputs["image"] = _generate_images(
@@ -683,7 +684,8 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_
683684
height=height, width=width, batch_size=batch_size, channel=1, input_type=input_type
684685
)
685686

686-
inputs["strength"] = 0.75
687+
if model_arch != "flux-fill":
688+
inputs["strength"] = 0.75
687689
inputs["height"] = height
688690
inputs["width"] = width
689691

@@ -699,7 +701,12 @@ def test_load_vanilla_model_which_is_not_supported(self):
699701
@parameterized.expand(SUPPORTED_ARCHITECTURES)
700702
@require_diffusers
701703
def test_ov_pipeline_class_dispatch(self, model_arch: str):
702-
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
704+
if model_arch != "flux-fill":
705+
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
706+
else:
707+
from diffusers import FluxFillPipeline
708+
709+
auto_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])
703710
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
704711

705712
self.assertEqual(ov_pipeline.auto_model_class, auto_pipeline.__class__)
@@ -713,7 +720,9 @@ def test_num_images_per_prompt(self, model_arch: str):
713720
for height in [64, 128]:
714721
for width in [64, 128]:
715722
for num_images_per_prompt in [1, 3]:
716-
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
723+
inputs = self.generate_inputs(
724+
height=height, width=width, batch_size=batch_size, model_arch=model_arch
725+
)
717726
outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images
718727
self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3))
719728

@@ -752,7 +761,9 @@ def test_shape(self, model_arch: str):
752761
height, width, batch_size = 128, 64, 1
753762

754763
for input_type in ["pil", "np", "pt"]:
755-
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type)
764+
inputs = self.generate_inputs(
765+
height=height, width=width, batch_size=batch_size, input_type=input_type, model_arch=model_arch
766+
)
756767

757768
for output_type in ["pil", "np", "pt", "latent"]:
758769
inputs["output_type"] = output_type
@@ -764,7 +775,7 @@ def test_shape(self, model_arch: str):
764775
elif output_type == "pt":
765776
self.assertEqual(outputs.shape, (batch_size, 3, height, width))
766777
else:
767-
if model_arch != "flux":
778+
if not model_arch.startswith("flux"):
768779
out_channels = (
769780
pipeline.unet.config.out_channels
770781
if pipeline.unet is not None
@@ -782,17 +793,26 @@ def test_shape(self, model_arch: str):
782793
else:
783794
packed_height = height // pipeline.vae_scale_factor // 2
784795
packed_width = width // pipeline.vae_scale_factor // 2
785-
channels = pipeline.transformer.config.in_channels
796+
channels = (
797+
pipeline.transformer.config.in_channels
798+
if model_arch != "flux-fill"
799+
else pipeline.transformer.out_channels
800+
)
786801
self.assertEqual(outputs.shape, (batch_size, packed_height * packed_width, channels))
787802

788803
@parameterized.expand(SUPPORTED_ARCHITECTURES)
789804
@require_diffusers
790805
def test_compare_to_diffusers_pipeline(self, model_arch: str):
791806
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
792-
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
807+
if model_arch != "flux-fill":
808+
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
809+
else:
810+
from diffusers import FluxFillPipeline
811+
812+
diffusers_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])
793813

794814
height, width, batch_size = 64, 64, 1
795-
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
815+
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)
796816

797817
for output_type in ["latent", "np", "pt"]:
798818
inputs["output_type"] = output_type
@@ -804,7 +824,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str):
804824

805825
# test generation when input resolution nondevisible on 64
806826
height, width, batch_size = 96, 96, 1
807-
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
827+
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)
808828

809829
for output_type in ["latent", "np", "pt"]:
810830
inputs["output_type"] = output_type
@@ -820,7 +840,7 @@ def test_image_reproducibility(self, model_arch: str):
820840
pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
821841

822842
height, width, batch_size = 64, 64, 1
823-
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
843+
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)
824844

825845
for generator_framework in ["np", "pt"]:
826846
ov_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED))

tests/openvino/test_exporters_cli.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
from optimum.exporters.openvino.__main__ import main_export
2929
from optimum.intel import ( # noqa
30+
OVFluxFillPipeline,
3031
OVFluxPipeline,
3132
OVLatentConsistencyModelPipeline,
3233
OVModelForAudioClassification,
@@ -82,7 +83,9 @@ class OVCLIExportTestCase(unittest.TestCase):
8283
]
8384

8485
if is_transformers_version(">=", "4.45"):
85-
SUPPORTED_ARCHITECTURES.extend([("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux")])
86+
SUPPORTED_ARCHITECTURES.extend(
87+
[("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux"), ("inpainting", "flux-fill")]
88+
)
8689
EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
8790
"gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
8891
"t5": 0, # no .model file in the repository
@@ -97,6 +100,7 @@ class OVCLIExportTestCase(unittest.TestCase):
97100
"stable-diffusion-xl": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
98101
"stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
99102
"flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
103+
"flux-fill": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
100104
"llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
101105
}
102106

tests/openvino/utils_tests.py

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"falcon-40b": "katuni4ka/tiny-random-falcon-40b",
6767
"flaubert": "hf-internal-testing/tiny-random-flaubert",
6868
"flux": "katuni4ka/tiny-random-flux",
69+
"flux-fill": "katuni4ka/tiny-random-flux-fill",
6970
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
7071
"gpt2": "hf-internal-testing/tiny-random-gpt2",
7172
"gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
@@ -193,6 +194,7 @@
193194
"open-clip": (20, 28),
194195
"stable-diffusion-3": (66, 42, 58, 30),
195196
"flux": (56, 24, 28, 64),
197+
"flux-fill": (56, 24, 28, 64),
196198
"llava": (30, 9, 1),
197199
"llava_next": (30, 9, 1),
198200
"minicpmv": (30, 26, 1, 6),

0 commit comments

Comments
 (0)