Skip to content

Commit bcc4665

Browse files
Removed saving to temporary directory; added core property handling for OVModelForCausalLM
1 parent 12dc672 commit bcc4665

File tree

3 files changed

+17
-11
lines changed

3 files changed

+17
-11
lines changed

optimum/commands/export/openvino.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import logging
1717
import sys
18-
import tempfile
1918
from pathlib import Path
2019
from typing import TYPE_CHECKING, Optional
2120

@@ -357,17 +356,12 @@ def run(self):
357356
if quantize_after_export:
358357
from optimum.intel import OVModelForCausalLM, OVQuantizer
359358

359+
# TODO: remove disabling mmap once OV is updated to 2024.3
360360
model = OVModelForCausalLM.from_pretrained(
361-
self.args.output, trust_remote_code=self.args.trust_remote_code
361+
self.args.output, trust_remote_code=self.args.trust_remote_code, ov_config={"ENABLE_MMAP": "NO"}
362362
)
363363
quantizer = OVQuantizer(model)
364364
quantization_config.tokenizer = quantization_config.tokenizer or str(self.args.output)
365-
# TODO: set save_directory=self.args.output once OV is updated to 2024.3
366-
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config))
367-
with tempfile.TemporaryDirectory() as temp_dir:
368-
import shutil
369-
370-
model.save_pretrained(temp_dir)
371-
ov_config.save_pretrained(self.args.output)
372-
shutil.copy(f"{temp_dir}/openvino_model.xml", f"{self.args.output}/openvino_model.xml")
373-
shutil.copy(f"{temp_dir}/openvino_model.bin", f"{self.args.output}/openvino_model.bin")
365+
quantizer.quantize(
366+
ov_config=OVConfig(quantization_config=quantization_config), save_directory=self.args.output
367+
)

optimum/intel/openvino/modeling_base.py

+5
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def __init__(
103103
def load_model(
104104
file_name: Union[str, Path],
105105
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
106+
ov_core_properties: Optional[Dict] = None,
106107
):
107108
"""
108109
Loads the model.
@@ -112,6 +113,8 @@ def load_model(
112113
The path of the model ONNX or XML file.
113114
quantization_config (`OVWeightQuantizationConfig` or `Dict`, *optional*):
114115
Quantization config to apply after model is loaded.
116+
ov_core_properties (`Dict`, *optional*):
117+
OpenVINO core properties to set before model loading.
115118
"""
116119

117120
def fix_op_names_duplicates(model: openvino.runtime.Model):
@@ -128,6 +131,8 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
128131

129132
if isinstance(file_name, str):
130133
file_name = Path(file_name)
134+
if ov_core_properties:
135+
core.set_property(ov_core_properties)
131136
model = core.read_model(file_name) if not file_name.suffix == ".onnx" else convert_model(file_name)
132137
if file_name.suffix == ".onnx":
133138
model = fix_op_names_duplicates(model) # should be called during model conversion to IR

optimum/intel/openvino/modeling_decoder.py

+7
Original file line numberDiff line numberDiff line change
@@ -748,9 +748,16 @@ def _from_pretrained(
748748

749749
load_in_4bit = quantization_config.bits == 4 if quantization_config else False
750750

751+
ov_config = kwargs.get("ov_config", None)
752+
ov_core_properties = {}
753+
if ov_config and "ENABLE_MMAP" in ov_config:
754+
ov_core_properties["ENABLE_MMAP"] = ov_config["ENABLE_MMAP"]
755+
del ov_config["ENABLE_MMAP"]
756+
751757
model = cls.load_model(
752758
model_cache_path,
753759
quantization_config=None if load_in_4bit else quantization_config,
760+
ov_core_properties=ov_core_properties,
754761
)
755762

756763
model_type = config.model_type.replace("_", "-")

0 commit comments

Comments
 (0)