Add code path for LLAMA_CPP plugins to load models directly from file

vshampor · vshampor · commit 4aacf40ab603 · 2024-03-13T11:29:58.000+01:00
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
@@ -786,8 +786,12 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
     ov::SoPtr<ov::ICompiledModel> compiled_model;
 
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
+
+    if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) {
+        compiled_model = plugin.compile_model(model_path, parsed._config);
+    }
+    else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
     // Skip caching for proxy plugin. HW plugin will load network from the cache
-    if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
         CacheContent cacheContent{cacheManager, model_path};
         cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
         std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);