@@ -272,6 +272,7 @@ def _from_pretrained(
272
272
subfolder = subfolder ,
273
273
)
274
274
file_names [name ] = model_cache_path
275
+ decoder_with_past = cls .load_model (file_names ["decoder_with_past" ], quantization_config )
275
276
else :
276
277
encoder = cls ._compile_model (
277
278
file_names ["encoder" ], kwargs .get ("device" , "CPU" ), kwargs .get ("ov_config" ), model_save_dir
@@ -280,6 +281,20 @@ def _from_pretrained(
280
281
file_names ["decoder" ], kwargs .get ("device" , "CPU" ), kwargs .get ("ov_config" ), model_save_dir
281
282
)
282
283
if use_cache and not model_has_state (decoder ):
284
+ model_file_names ["decoder_with_past" ] = decoder_with_past_file_name
285
+ model_file_names ["decoder_with_past_bin" ] = decoder_with_past_file_name .replace (".xml" , ".bin" )
286
+ for name in ["decoder_with_past" , "decoder_with_past_bin" ]:
287
+ model_cache_path = hf_hub_download (
288
+ repo_id = model_id ,
289
+ filename = model_file_names [name ],
290
+ token = token ,
291
+ revision = revision ,
292
+ cache_dir = cache_dir ,
293
+ force_download = force_download ,
294
+ local_files_only = local_files_only ,
295
+ subfolder = subfolder ,
296
+ )
297
+ file_names [name ] = model_cache_path
283
298
decoder_with_past = cls ._compile_model (
284
299
file_names ["decoder_with_past" ],
285
300
kwargs .get ("device" , "CPU" ),
0 commit comments