Skip to content

Commit 4aacf40

Browse files
committed
Add code path for LLAMA_CPP plugins to load models directly from file
1 parent 99ce7c0 commit 4aacf40

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/inference/src/dev/core_impl.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -786,8 +786,12 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
786786
ov::SoPtr<ov::ICompiledModel> compiled_model;
787787

788788
auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
789+
790+
if (plugin.get_name().find("LLAMA_CPP") != std::string::npos) {
791+
compiled_model = plugin.compile_model(model_path, parsed._config);
792+
}
793+
else if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
789794
// Skip caching for proxy plugin. HW plugin will load network from the cache
790-
if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
791795
CacheContent cacheContent{cacheManager, model_path};
792796
cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
793797
std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);

0 commit comments

Comments
 (0)