From cc9ae9ef05db670b736b8497b33c216a319e1d33 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 13 Mar 2024 11:29:58 +0100
Subject: [PATCH 1/3] Add code path for LLAMA_CPP plugins to load models
 directly from file

---
 src/inference/src/dev/core_impl.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 63e5a747819c96..5a354b08addebd 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -786,8 +786,12 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
     ov::SoPtr<ov::ICompiledModel> compiled_model;
 
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
+
+    if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) {
+        compiled_model = plugin.compile_model(model_path, parsed._config);
+    }
+    else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
     // Skip caching for proxy plugin. HW plugin will load network from the cache
-    if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
         CacheContent cacheContent{cacheManager, model_path};
         cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
         std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);

From 234d72fe65ddd5fadcbebe7d77643dcff91b7d2a Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 13 Mar 2024 15:43:44 +0100
Subject: [PATCH 2/3] Merge the last two ifs instead

---
 src/inference/src/dev/core_impl.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 5a354b08addebd..5d0e32c6b96046 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -787,10 +787,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
 
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
 
-    if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) {
-        compiled_model = plugin.compile_model(model_path, parsed._config);
-    }
-    else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
+    if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
     // Skip caching for proxy plugin. HW plugin will load network from the cache
         CacheContent cacheContent{cacheManager, model_path};
         cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
@@ -800,13 +797,8 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
                 auto model = read_model(model_path, std::string{});
                 return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent);
             });
-    } else if (cacheManager) {
-        // this code path is enabled for AUTO / MULTI / BATCH / PROXY devices which don't support
-        // import / export explicitly, but can redirect this functionality to actual HW plugin
-        compiled_model = plugin.compile_model(model_path, parsed._config);
     } else {
-        auto model = read_model(model_path, std::string());
-        compiled_model = plugin.compile_model(model, parsed._config);
+        compiled_model = plugin.compile_model(model_path, parsed._config);
     }
     return compiled_model;
 }

From 36bc65a194e62aebfcb784d818ebf424f6cffa82 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 13 Mar 2024 16:59:36 +0100
Subject: [PATCH 3/3] Fix code style

---
 src/inference/src/dev/core_impl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 5d0e32c6b96046..42b91e95b95e48 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -788,7 +788,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
 
     if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
-    // Skip caching for proxy plugin. HW plugin will load network from the cache
+        // Skip caching for proxy plugin. HW plugin will load network from the cache
         CacheContent cacheContent{cacheManager, model_path};
         cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
         std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);