From cc9ae9ef05db670b736b8497b33c216a319e1d33 Mon Sep 17 00:00:00 2001 From: Vasily Shamporov Date: Wed, 13 Mar 2024 11:29:58 +0100 Subject: [PATCH 1/3] Add code path for LLAMA_CPP plugins to load models directly from file --- src/inference/src/dev/core_impl.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 63e5a747819c96..5a354b08addebd 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -786,8 +786,12 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod ov::SoPtr compiled_model; auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + + if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) { + compiled_model = plugin.compile_model(model_path, parsed._config); + } + else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { // Skip caching for proxy plugin. HW plugin will load network from the cache - if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { CacheContent cacheContent{cacheManager, model_path}; cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); From 234d72fe65ddd5fadcbebe7d77643dcff91b7d2a Mon Sep 17 00:00:00 2001 From: Vasily Shamporov Date: Wed, 13 Mar 2024 15:43:44 +0100 Subject: [PATCH 2/3] Merge the last two ifs instead --- src/inference/src/dev/core_impl.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 5a354b08addebd..5d0e32c6b96046 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -787,10 +787,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; - if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) { - compiled_model = plugin.compile_model(model_path, parsed._config); - } - else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { + if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { // Skip caching for proxy plugin. HW plugin will load network from the cache CacheContent cacheContent{cacheManager, model_path}; cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config)); @@ -800,13 +797,8 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod auto model = read_model(model_path, std::string{}); return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent); }); - } else if (cacheManager) { - // this code path is enabled for AUTO / MULTI / BATCH / PROXY devices which don't support - // import / export explicitly, but can redirect this functionality to actual HW plugin - compiled_model = plugin.compile_model(model_path, parsed._config); } else { - auto model = read_model(model_path, std::string()); - compiled_model = plugin.compile_model(model, parsed._config); + compiled_model = plugin.compile_model(model_path, parsed._config); } return compiled_model; } From 36bc65a194e62aebfcb784d818ebf424f6cffa82 Mon Sep 17 00:00:00 2001 From: Vasily Shamporov Date: Wed, 13 Mar 2024 16:59:36 +0100 Subject: [PATCH 3/3] Fix code style --- src/inference/src/dev/core_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 5d0e32c6b96046..42b91e95b95e48 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -788,7 +788,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - // Skip caching for proxy plugin. HW plugin will load network from the cache + // Skip caching for proxy plugin. HW plugin will load network from the cache CacheContent cacheContent{cacheManager, model_path}; cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId);