Skip to content

Commit d7035b5

Browse files
mvafineaidovarkazants
authored
Load diffusers in native FP16/BF16 precision to reduce the memory usage (#1033)
* Load diffusers in native FP16/BF16 precision to reduce the memory usage * Apply suggestions from code review Co-authored-by: Ekaterina Aidova <ekaterina.aidova@intel.com> * Fix code * Update optimum/exporters/openvino/__main__.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * Find first floating point tensor instead of first tensor * Fix style * Update optimum/exporters/openvino/__main__.py * Update optimum/exporters/openvino/__main__.py * Check if safetensors available * Fix style * Extract code in the utils fuction --------- Co-authored-by: Ekaterina Aidova <ekaterina.aidova@intel.com> Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
1 parent eacf098 commit d7035b5

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

optimum/exporters/openvino/__main__.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@
4141
)
4242
from optimum.utils.save_utils import maybe_load_preprocessors
4343

44-
from .utils import _MAX_UNCOMPRESSED_SIZE, MULTI_MODAL_TEXT_GENERATION_MODELS, clear_class_registry
44+
from .utils import (
45+
_MAX_UNCOMPRESSED_SIZE,
46+
MULTI_MODAL_TEXT_GENERATION_MODELS,
47+
clear_class_registry,
48+
deduce_diffusers_dtype,
49+
)
4550

4651

4752
FORCE_ATTN_MODEL_CLASSES = {"phi3-v": "eager"}
@@ -332,6 +337,19 @@ class StoreAttr(object):
332337
return model
333338

334339
GPTQQuantizer.post_init_model = post_init_model
340+
elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"):
341+
dtype = deduce_diffusers_dtype(
342+
model_name_or_path,
343+
revision=revision,
344+
cache_dir=cache_dir,
345+
token=token,
346+
local_files_only=local_files_only,
347+
force_download=force_download,
348+
trust_remote_code=trust_remote_code,
349+
)
350+
if dtype in [torch.float16, torch.bfloat16]:
351+
loading_kwargs["torch_dtype"] = dtype
352+
patch_16bit = True
335353

336354
if library_name == "open_clip":
337355
model = _OpenClipForZeroShotImageClassification.from_pretrained(model_name_or_path, cache_dir=cache_dir)

optimum/exporters/openvino/utils.py

+36
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from optimum.exporters import TasksManager
2727
from optimum.exporters.onnx.base import OnnxConfig
2828
from optimum.intel.utils import is_transformers_version
29+
from optimum.intel.utils.import_utils import is_safetensors_available
2930
from optimum.utils import is_diffusers_available
3031
from optimum.utils.save_utils import maybe_save_preprocessors
3132

@@ -232,6 +233,41 @@ def save_config(config, save_dir):
232233
config.to_json_file(output_config_file, use_diff=True)
233234

234235

236+
def deduce_diffusers_dtype(model_name_or_path, **loading_kwargs):
237+
dtype = None
238+
if is_safetensors_available():
239+
if Path(model_name_or_path).is_dir():
240+
path = Path(model_name_or_path)
241+
else:
242+
from diffusers import DiffusionPipeline
243+
244+
path = DiffusionPipeline.download(model_name_or_path, **loading_kwargs)
245+
model_part_name = None
246+
if (path / "transformer").is_dir():
247+
model_part_name = "transformer"
248+
elif (path / "unet").is_dir():
249+
model_part_name = "unet"
250+
if model_part_name:
251+
directory = path / model_part_name
252+
safetensors_files = [
253+
filename for filename in directory.glob("*.safetensors") if len(filename.suffixes) == 1
254+
]
255+
safetensors_file = None
256+
if len(safetensors_files) > 0:
257+
safetensors_file = safetensors_files.pop(0)
258+
if safetensors_file:
259+
from safetensors import safe_open
260+
261+
with safe_open(safetensors_file, framework="pt", device="cpu") as f:
262+
if len(f.keys()) > 0:
263+
for key in f.keys():
264+
tensor = f.get_tensor(key)
265+
if tensor.dtype.is_floating_point:
266+
dtype = tensor.dtype
267+
break
268+
return dtype
269+
270+
235271
def save_preprocessors(
236272
preprocessors: List, config: PretrainedConfig, output: Union[str, Path], trust_remote_code: bool
237273
):

0 commit comments

Comments
 (0)