File tree 3 files changed +17
-10
lines changed
3 files changed +17
-10
lines changed Original file line number Diff line number Diff line change @@ -357,17 +357,12 @@ def run(self):
357
357
if quantize_after_export :
358
358
from optimum .intel import OVModelForCausalLM , OVQuantizer
359
359
360
+ # TODO: remove disabling mmap once OV is updated to 2024.3
360
361
model = OVModelForCausalLM .from_pretrained (
361
- self .args .output , trust_remote_code = self .args .trust_remote_code
362
+ self .args .output , trust_remote_code = self .args .trust_remote_code , ov_config = { "ENABLE_MMAP" : "NO" }
362
363
)
363
364
quantizer = OVQuantizer (model )
364
365
quantization_config .tokenizer = quantization_config .tokenizer or str (self .args .output )
365
- # TODO: set save_directory=self.args.output once OV is updated to 2024.3
366
- quantizer .quantize (ov_config = OVConfig (quantization_config = quantization_config ))
367
- with tempfile .TemporaryDirectory () as temp_dir :
368
- import shutil
369
-
370
- model .save_pretrained (temp_dir )
371
- ov_config .save_pretrained (self .args .output )
372
- shutil .copy (f"{ temp_dir } /openvino_model.xml" , f"{ self .args .output } /openvino_model.xml" )
373
- shutil .copy (f"{ temp_dir } /openvino_model.bin" , f"{ self .args .output } /openvino_model.bin" )
366
+ quantizer .quantize (
367
+ ov_config = OVConfig (quantization_config = quantization_config ), save_directory = self .args .output
368
+ )
Original file line number Diff line number Diff line change @@ -103,6 +103,7 @@ def __init__(
103
103
def load_model (
104
104
file_name : Union [str , Path ],
105
105
quantization_config : Union [OVWeightQuantizationConfig , Dict ] = None ,
106
+ ov_core_properties : Optional [Dict ] = None ,
106
107
):
107
108
"""
108
109
Loads the model.
@@ -112,6 +113,8 @@ def load_model(
112
113
The path of the model ONNX or XML file.
113
114
quantization_config (`OVWeightQuantizationConfig` or `Dict`, *optional*):
114
115
Quantization config to apply after model is loaded.
116
+ ov_core_properties (`Dict`, *optional*):
117
+ OpenVINO core properties to set before model loading.
115
118
"""
116
119
117
120
def fix_op_names_duplicates (model : openvino .runtime .Model ):
@@ -128,6 +131,8 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
128
131
129
132
if isinstance (file_name , str ):
130
133
file_name = Path (file_name )
134
+ if ov_core_properties :
135
+ core .set_property (ov_core_properties )
131
136
model = core .read_model (file_name ) if not file_name .suffix == ".onnx" else convert_model (file_name )
132
137
if file_name .suffix == ".onnx" :
133
138
model = fix_op_names_duplicates (model ) # should be called during model conversion to IR
Original file line number Diff line number Diff line change @@ -748,9 +748,16 @@ def _from_pretrained(
748
748
749
749
load_in_4bit = quantization_config .bits == 4 if quantization_config else False
750
750
751
+ ov_config = kwargs .get ("ov_config" , None )
752
+ ov_core_properties = {}
753
+ if ov_config and "ENABLE_MMAP" in ov_config :
754
+ ov_core_properties ["ENABLE_MMAP" ] = ov_config ["ENABLE_MMAP" ]
755
+ del ov_config ["ENABLE_MMAP" ]
756
+
751
757
model = cls .load_model (
752
758
model_cache_path ,
753
759
quantization_config = None if load_in_4bit else quantization_config ,
760
+ ov_core_properties = ov_core_properties ,
754
761
)
755
762
756
763
model_type = config .model_type .replace ("_" , "-" )
You can’t perform that action at this time.
0 commit comments