16
16
17
17
from ...exporters .openvino import main_export
18
18
from ...exporters .openvino .stateful import ensure_stateful_is_available
19
+ from .. import OVQuantizer
19
20
from .configuration import OVConfig , OVWeightQuantizationConfig
20
21
from .modeling_base import OVBaseModel , OVModelPart
21
22
from .modeling_decoder import CausalLMOutputWithPast , OVModelForCausalLM
@@ -178,6 +179,7 @@ def __init__(self, model: ov.Model, parent_model: OVBaseModel) -> None:
178
179
]
179
180
180
181
def forward (self , pixel_values , ** kwargs ):
182
+ self ._compile ()
181
183
result = self .request ({"pixel_values" : pixel_values })
182
184
last_hidden_state = result [0 ]
183
185
hidden_states = None
@@ -221,7 +223,7 @@ def __init__(
221
223
self .ov_config = {} if ov_config is None else {** ov_config }
222
224
self .preprocessors = kwargs .get ("preprocessors" , [])
223
225
self .lm_model = language_model
224
- self .text_embdings_model = text_embeddings
226
+ self .text_embeddings_model = text_embeddings
225
227
self .vision_embeddings_model = vision_embeddings
226
228
self ._supports_cache_class = False
227
229
self .main_input_name = "input_ids"
@@ -238,13 +240,13 @@ def __init__(
238
240
self ._set_ov_config_parameters ()
239
241
self .language_model = OVModelWithEmbedForCausalLM (
240
242
self .lm_model ,
241
- self .text_embdings_model ,
243
+ self .text_embeddings_model ,
242
244
config = config ,
243
245
deivce = device ,
244
246
ov_config = ov_config ,
245
247
model_save_dir = model_save_dir ,
246
248
quantization_config = quantization_config ,
247
- compile = not self ._compile_only ,
249
+ compile = not self ._compile_only and enable_compilation ,
248
250
compile_only = self ._compile_only ,
249
251
)
250
252
self .vision_embeddings = OVVisionEmbedding (self .vision_embeddings_model , self )
@@ -264,6 +266,18 @@ def __init__(
264
266
except AttributeError :
265
267
pass
266
268
269
+ def clear_requests (self ):
270
+ if self ._compile_only :
271
+ raise ValueError (
272
+ "`clear_requests()` is not supported with `compile_only` mode, please intialize model without this option"
273
+ )
274
+
275
+ self .language_model .clear_requests ()
276
+ components = [self .vision_embeddings ] + [getattr (self , part ) for part in self .additional_parts ]
277
+ for component in components :
278
+ if component is not None :
279
+ component .request = None
280
+
267
281
def compile (self ):
268
282
self .language_model .compile ()
269
283
self .vision_embeddings ._compile ()
@@ -281,11 +295,11 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
281
295
save_directory (`str` or `Path`):
282
296
The directory where to save the model files.
283
297
"""
284
- src_files = [self .lm_model , self .text_embdings_model , self .vision_embeddings_model ]
298
+ src_files = [self .lm_model , self .text_embeddings_model , self .vision_embeddings_model ]
285
299
dst_file_names = [
286
300
"openvino_language_model.xml" ,
287
301
"openvino_text_embeddings_model.xml" ,
288
- "openvino_vision_embeddings .xml" ,
302
+ "openvino_vision_embeddings_model .xml" ,
289
303
]
290
304
for part in self .additional_parts :
291
305
model = getattr (self , f"{ part } _model" , None )
@@ -364,26 +378,18 @@ def _from_pretrained(
364
378
raise ValueError ("You cannot use both `use_auth_token` and `token` arguments at the same time." )
365
379
token = use_auth_token
366
380
367
- model_cls = MODEL_TYPE_TO_CLS_MAPPING [config .model_type ]
368
-
369
- quantization_config = model_cls ._prepare_weight_quantization_config (quantization_config , load_in_8bit )
370
- compile_only = kwargs .get ("compile_only" , False )
371
-
372
- # Load model from a local directory
373
- if os .path .isdir (model_id ):
374
- model_save_dir = Path (model_id )
375
381
model_file_names = {
376
382
"language_model" : "openvino_language_model.xml" ,
377
383
"text_embeddings" : "openvino_text_embeddings_model.xml" ,
378
384
"vision_embeddings" : "openvino_vision_embeddings_model.xml" ,
379
385
}
380
386
387
+ model_cls = MODEL_TYPE_TO_CLS_MAPPING [config .model_type ]
381
388
for part in model_cls .additional_parts :
382
389
model_file_names [part ] = f"openvino_{ part } _model.xml"
383
- model_cls = MODEL_TYPE_TO_CLS_MAPPING [config .model_type ]
384
- quantization_config = model_cls ._prepare_weight_quantization_config (quantization_config , load_in_8bit )
385
390
compile_only = kwargs .get ("compile_only" , False )
386
391
if os .path .isdir (model_id ):
392
+ # Load model from a local directory
387
393
model_save_dir = Path (model_id )
388
394
file_names = {k : os .path .join (model_id , model_file_names [k ]) for k in model_file_names }
389
395
else :
@@ -401,11 +407,11 @@ def _from_pretrained(
401
407
file_names [name ] = model_cache_path
402
408
model_save_dir = Path (model_cache_path ).parent
403
409
if not compile_only :
404
- language_model = model_cls .load_model (file_names ["language_model" ], quantization_config )
405
- text_embeddings = model_cls .load_model (file_names ["text_embeddings" ], quantization_config )
406
- vision_embeddings = model_cls .load_model (file_names ["vision_embeddings" ], quantization_config )
410
+ language_model = model_cls .load_model (file_names ["language_model" ])
411
+ text_embeddings = model_cls .load_model (file_names ["text_embeddings" ])
412
+ vision_embeddings = model_cls .load_model (file_names ["vision_embeddings" ])
407
413
for part in model_cls .additional_parts :
408
- kwargs [part ] = model_cls .load_model (file_names [part ], quantization_config )
414
+ kwargs [part ] = model_cls .load_model (file_names [part ])
409
415
else :
410
416
language_model = model_cls ._compile_model (
411
417
file_names ["language_model" ],
@@ -445,7 +451,12 @@ def _from_pretrained(
445
451
except Exception :
446
452
pass
447
453
448
- return model_cls (
454
+ quantization_config = model_cls ._prepare_weight_quantization_config (quantization_config , load_in_8bit )
455
+ to_quantize = not compile_only and quantization_config is not None
456
+ if to_quantize :
457
+ kwargs ["compile" ] = False
458
+
459
+ model = model_cls (
449
460
language_model = language_model ,
450
461
text_embeddings = text_embeddings ,
451
462
vision_embeddings = vision_embeddings ,
@@ -455,6 +466,11 @@ def _from_pretrained(
455
466
** kwargs ,
456
467
)
457
468
469
+ if to_quantize :
470
+ OVQuantizer (model ).quantize (ov_config = OVConfig (quantization_config = quantization_config ))
471
+
472
+ return model
473
+
458
474
@classmethod
459
475
def _from_transformers (
460
476
cls ,
@@ -533,8 +549,8 @@ def half(self):
533
549
"""
534
550
apply_moc_transformations (self .lm_model , cf = False )
535
551
compress_model_transformation (self .lm_model )
536
- apply_moc_transformations (self .text_embdings_model , cf = False )
537
- compress_model_transformation (self .text_embdings_model )
552
+ apply_moc_transformations (self .text_embeddings_model , cf = False )
553
+ compress_model_transformation (self .text_embeddings_model )
538
554
apply_moc_transformations (self .vision_embeddings_model , cf = False )
539
555
compress_model_transformation (self .vision_embeddings_model )
540
556
for part in self .additional_parts :
0 commit comments