@@ -87,15 +87,25 @@ def __init__(
87
87
compile : bool = True ,
88
88
ov_config : Optional [Dict [str , str ]] = None ,
89
89
model_save_dir : Optional [Union [str , Path , TemporaryDirectory ]] = None ,
90
+ quantization_config : Optional [Union [OVWeightQuantizationConfig , Dict ]] = None ,
90
91
** kwargs ,
91
92
):
92
93
self ._internal_dict = config
93
94
self ._device = device .upper ()
94
95
self .is_dynamic = dynamic_shapes
95
96
self .ov_config = ov_config if ov_config is not None else {}
96
- self ._model_save_dir = (
97
- Path (model_save_dir .name ) if isinstance (model_save_dir , TemporaryDirectory ) else model_save_dir
98
- )
97
+
98
+ # This attribute is needed to keep one reference on the temporary directory, since garbage collecting
99
+ # would end-up removing the directory containing the underlying OpenVINO model
100
+ self ._model_save_dir_tempdirectory_instance = None
101
+ if isinstance (model_save_dir , TemporaryDirectory ):
102
+ self ._model_save_dir_tempdirectory_instance = model_save_dir
103
+ self ._model_save_dir = Path (model_save_dir .name )
104
+ elif isinstance (model_save_dir , str ):
105
+ self ._model_save_dir = Path (model_save_dir )
106
+ else :
107
+ self ._model_save_dir = model_save_dir
108
+
99
109
self .vae_decoder = OVModelVaeDecoder (vae_decoder , self )
100
110
self .unet = OVModelUnet (unet , self )
101
111
self .text_encoder = OVModelTextEncoder (text_encoder , self ) if text_encoder is not None else None
@@ -140,6 +150,10 @@ def __init__(
140
150
141
151
self ._internal_dict .pop ("vae" , None )
142
152
153
+ self ._openvino_config = None
154
+ if quantization_config :
155
+ self ._openvino_config = OVConfig (quantization_config = quantization_config )
156
+
143
157
def _save_pretrained (self , save_directory : Union [str , Path ]):
144
158
"""
145
159
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
@@ -177,6 +191,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
177
191
if self .tokenizer_2 is not None :
178
192
self .tokenizer_2 .save_pretrained (save_directory / "tokenizer_2" )
179
193
194
+ self ._save_openvino_config (save_directory )
195
+
180
196
@classmethod
181
197
def _from_pretrained (
182
198
cls ,
@@ -257,10 +273,7 @@ def _from_pretrained(
257
273
else :
258
274
kwargs [name ] = load_method (new_model_save_dir )
259
275
260
- # Give default quantization config if not provided and load_in_8bit=True
261
- if load_in_8bit :
262
- quantization_config = quantization_config or {"bits" : 8 }
263
-
276
+ quantization_config = cls ._prepare_weight_quantization_config (quantization_config , load_in_8bit )
264
277
unet = cls .load_model (
265
278
new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name , quantization_config
266
279
)
@@ -278,7 +291,14 @@ def _from_pretrained(
278
291
if model_save_dir is None :
279
292
model_save_dir = new_model_save_dir
280
293
281
- return cls (unet = unet , config = config , model_save_dir = model_save_dir , ** components , ** kwargs )
294
+ return cls (
295
+ unet = unet ,
296
+ config = config ,
297
+ model_save_dir = model_save_dir ,
298
+ quantization_config = quantization_config ,
299
+ ** components ,
300
+ ** kwargs ,
301
+ )
282
302
283
303
@classmethod
284
304
def _from_transformers (
0 commit comments