From cd3452b3cf9d246250367d20de828ba7b1894edb Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 15:43:40 +0800 Subject: [PATCH 01/49] Update continuous_batching_pipeline.hpp for remove adapters --- src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index d161b7b29b..630f5a0883 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -156,5 +156,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { * @brief finish chat and clear kv cache. */ void finish_chat(); + + void remove_adapters(const ov::AnyMap& config_map); }; } From 58d79283c3a95f391bc25fb36fedcaaf1f02dc73 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 15:47:15 +0800 Subject: [PATCH 02/49] Update llm_pipeline.hpp remove adapters --- src/cpp/include/openvino/genai/llm_pipeline.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 00dda872f9..c6a8ed4c38 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -274,6 +274,18 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { * Turns off keeping KV cache between generate calls. */ void finish_chat(); + + /** + * @brief remove LoRA adapters. + * @param properties optional plugin properties, ov::genai::adapters property for LoRA adapters. + */ + template ::value, bool>::type = true> + util::StringAny remove_adapters(Properties&&... properties) { + remove_adapters(AnyMap{std::forward(properties)...}); + } + + void remove_adapters(const ov::AnyMap& config_map); private: std::unique_ptr m_pimpl; }; From 913086f7881a06503a79b9225f50214c545f9264 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 15:49:37 +0800 Subject: [PATCH 03/49] Update lora_adapter.hpp remove adapters --- src/cpp/include/openvino/genai/lora_adapter.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cpp/include/openvino/genai/lora_adapter.hpp b/src/cpp/include/openvino/genai/lora_adapter.hpp index 754553fc43..36bca58175 100644 --- a/src/cpp/include/openvino/genai/lora_adapter.hpp +++ b/src/cpp/include/openvino/genai/lora_adapter.hpp @@ -193,6 +193,9 @@ class OPENVINO_GENAI_EXPORTS AdapterController { // Apply adapters configured in the current config set last time, or set and use new config given as optional `config` argument void apply(ov::InferRequest request, const std::optional& config = std::nullopt); + // Remove LoRA adapters + void remove_adapters(const std::optional& config); + // Returns true if a given name is one of the state names created by this adapter controller for dynamic LoRA // Helps to distinguish LoRA states from other states (e.g. KV cache state) in the model for a partial state reset. bool has_state_name(const std::string& name); From e10844ddfb18561a48562fce822005397a085b49 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 15:54:42 +0800 Subject: [PATCH 04/49] Update continuous_batching_adapter.hpp remove adapters --- src/cpp/src/continuous_batching_adapter.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpp/src/continuous_batching_adapter.hpp b/src/cpp/src/continuous_batching_adapter.hpp index 29fb9b0463..82ce8b2317 100644 --- a/src/cpp/src/continuous_batching_adapter.hpp +++ b/src/cpp/src/continuous_batching_adapter.hpp @@ -209,6 +209,10 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { void finish_chat() override { m_impl->finish_chat(); }; + + void remove_adapters(const ov::AnyMap& plugin_config) override { + m_impl->remove_adapters(plugin_config); + }; }; } // namespace ov::genai From 63a260c12ff66fcfc0d679ec9188d96f0805179d Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:07:53 +0800 Subject: [PATCH 05/49] Update continuous_batching_impl.cpp remove adapters --- src/cpp/src/continuous_batching_impl.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index 4b77a48eb9..a417dac295 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -150,10 +150,11 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::initialize_pipeline( const ov::AnyMap& properties, const std::vector& kv_cache_config) { // apply LoRA - auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); - if (m_generation_config.adapters) { - m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model."); - m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable + std::optional adapters; + auto filtered_properties = extract_adapters_from_properties(properties, &adapters); + if (adapters) { + adapters->set_tensor_name_prefix("base_model.model.model."); + m_adapter_controller = AdapterController(model, *adapters, device); // TODO: Make the prefix name configurable } // Extract sampler_num_threads property if exists and remove it from properties size_t sampler_num_threads = std::thread::hardware_concurrency(); @@ -429,6 +430,12 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::set_adapters(const std: } } +void ContinuousBatchingPipeline::ContinuousBatchingImpl::remove_adapters(const std::optional& adapters) { + if (m_adapter_controller) { + m_adapter_controller->remove_adapters(adapters); + } +} + std::vector ContinuousBatchingPipeline::ContinuousBatchingImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From dc1e4455fef7255e3e5709806fe4ac4fad718f02 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:09:24 +0800 Subject: [PATCH 06/49] Update continuous_batching_impl.hpp remove adapters --- src/cpp/src/continuous_batching_impl.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpp/src/continuous_batching_impl.hpp b/src/cpp/src/continuous_batching_impl.hpp index 0b015f7976..f930b35e9a 100644 --- a/src/cpp/src/continuous_batching_impl.hpp +++ b/src/cpp/src/continuous_batching_impl.hpp @@ -139,5 +139,7 @@ class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatc * Updates LoRA adapters for current generation call */ void set_adapters(const std::optional& adapters); + + void remove_adapters(const std::optional& adapters) override; }; } // namespace ov::genai From a61e9663a894c514ad047744955fc35ed3cf5206 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:11:39 +0800 Subject: [PATCH 07/49] Update continuous_batching_pipeline.cpp --- src/cpp/src/continuous_batching_pipeline.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 4efd477211..b4d2fe4bde 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -237,3 +237,7 @@ void ContinuousBatchingPipeline::start_chat(const std::string& system_message) { void ContinuousBatchingPipeline::finish_chat() { m_impl->finish_chat(); }; + +void ContinuousBatchingPipeline::remove_adapters(const ov::AnyMap& plugin_config){ + m_impl->remove_adapters(plugin_config); +}; From 9d03f491e7b81baee1ae9fedd1f9ce76538880ed Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:15:31 +0800 Subject: [PATCH 08/49] Update icontinuous_batching.cpp remove adapters --- src/cpp/src/icontinuous_batching.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cpp/src/icontinuous_batching.cpp b/src/cpp/src/icontinuous_batching.cpp index 21b659804f..37edbcabb3 100644 --- a/src/cpp/src/icontinuous_batching.cpp +++ b/src/cpp/src/icontinuous_batching.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "icontinuous_batching.hpp" +#include "lora_helper.hpp" namespace ov::genai { @@ -39,6 +40,12 @@ void ContinuousBatchingPipeline::IContinuousBatchingPipeline::finish_chat() { m_history.clear(); }; +void ContinuousBatchingPipeline::IContinuousBatchingPipeline::remove_adapters(const ov::AnyMap& plugin_config) { + std::optional adapters; + auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); + remove_adapters(adapters); +}; + std::vector ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( const std::vector& prompts, From d180c877677d1b758313a1881118abe86cf2c114 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:17:24 +0800 Subject: [PATCH 09/49] Update icontinuous_batching.hpp remove adapters --- src/cpp/src/icontinuous_batching.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cpp/src/icontinuous_batching.hpp b/src/cpp/src/icontinuous_batching.hpp index a1700c9c31..949f0e2b46 100644 --- a/src/cpp/src/icontinuous_batching.hpp +++ b/src/cpp/src/icontinuous_batching.hpp @@ -133,5 +133,9 @@ class ContinuousBatchingPipeline::IContinuousBatchingPipeline { * Ends chat */ void finish_chat(); + + void remove_adapters(const ov::AnyMap& plugin_config); + + virtual void remove_adapters(const std::optional& config) = 0; }; -} \ No newline at end of file +} From 3fe9c56634b27ec70cff81fd83f712f668e2e3b6 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:22:24 +0800 Subject: [PATCH 10/49] Update llm_pipeline.cpp remove adapters --- src/cpp/src/llm_pipeline.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index d06a5b06e7..5790bb1dcd 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -298,6 +298,10 @@ void ov::genai::LLMPipeline::finish_chat() { m_pimpl->finish_chat(); } +void ov::genai::LLMPipeline::remove_adapters(const ov::AnyMap& config_map) { + m_pimpl->remove_adapters(config_map); +} + void ov::genai::LLMPipeline::set_generation_config(const GenerationConfig& config) { m_pimpl->set_generation_config(config); } From c15f23d9ec58887f478656eea336d42959dc9a30 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:24:38 +0800 Subject: [PATCH 11/49] Update llm_pipeline_base.hpp remove adapters --- src/cpp/src/llm_pipeline_base.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpp/src/llm_pipeline_base.hpp b/src/cpp/src/llm_pipeline_base.hpp index 8f849e89c4..35c411ac43 100644 --- a/src/cpp/src/llm_pipeline_base.hpp +++ b/src/cpp/src/llm_pipeline_base.hpp @@ -54,6 +54,8 @@ class LLMPipelineImplBase { virtual void start_chat(const std::string& system_message) = 0; virtual void finish_chat() = 0; + virtual void remove_adapters(const ov::AnyMap& plugin_config) = 0; + virtual ~LLMPipelineImplBase() = default; void save_load_time(std::chrono::steady_clock::time_point start_time) { From 57f75cf8b06ead12bf5153bb61cc558de7f2f754 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:27:56 +0800 Subject: [PATCH 12/49] Update llm_pipeline_stateful.cpp remove adapters --- src/cpp/src/llm_pipeline_stateful.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/cpp/src/llm_pipeline_stateful.cpp b/src/cpp/src/llm_pipeline_stateful.cpp index 5ca13269b8..b17a06c895 100644 --- a/src/cpp/src/llm_pipeline_stateful.cpp +++ b/src/cpp/src/llm_pipeline_stateful.cpp @@ -62,10 +62,11 @@ StatefulLLMPipeline::StatefulLLMPipeline( if (!m_use_full_chat_history) m_kv_history_trim_manager.kv_cache_seq_length_axis = kv_pos.seq_len; - auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); - if (m_generation_config.adapters) { - m_generation_config.adapters->set_tensor_name_prefix("base_model.model."); - m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable + std::optional adapters; + auto filtered_properties = extract_adapters_from_properties(properties, &adapters); + if (adapters) { + adapters->set_tensor_name_prefix("base_model.model."); + m_adapter_controller = AdapterController(model, *adapters, device); // TODO: Make the prefix name configurable } ov::CompiledModel compiled_model; if (m_is_npu) { @@ -414,4 +415,12 @@ void StatefulLLMPipeline::finish_chat() { } } +void StatefulLLMPipeline::remove_adapters(const ov::AnyMap& plugin_config) { + std::optional adapters; + auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); + + if (m_adapter_controller) { + m_adapter_controller->remove_adapters(adapters); + } +} } // namespace ov::genai From 4fd3200252b0d2ae11f176658ee1a80523fd873b Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:29:18 +0800 Subject: [PATCH 13/49] Update llm_pipeline_stateful.hpp remove adapters --- src/cpp/src/llm_pipeline_stateful.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpp/src/llm_pipeline_stateful.hpp b/src/cpp/src/llm_pipeline_stateful.hpp index 3558c4c1f3..cabb96e5b5 100644 --- a/src/cpp/src/llm_pipeline_stateful.hpp +++ b/src/cpp/src/llm_pipeline_stateful.hpp @@ -79,6 +79,8 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { void start_chat(const std::string& system_message) override; void finish_chat() override; + + void remove_adapters(const ov::AnyMap& plugin_config) override; }; } // namespace ov::genai From 205279528bdf7a2cd2e9cd2051aef2a9e0b82670 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:31:41 +0800 Subject: [PATCH 14/49] Update llm_pipeline_static.cpp remove adapters --- src/cpp/src/llm_pipeline_static.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 2af32642cc..3a4d57d8e9 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -14,6 +14,7 @@ #include "openvino/genai/text_streamer.hpp" #include +#include "lora_helper.hpp" namespace { @@ -346,6 +347,14 @@ void StatefulLLMPipeline::finish_chat() { m_history.clear(); }; +void StatefulLLMPipeline::remove_adapters(const ov::AnyMap& plugin_config) { + std::optional adapters; + auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); + + if (m_adapter_controller) { + m_adapter_controller->remove_adapters(adapters); + } +}; std::unique_ptr LLMPipelineFactory::create(const std::filesystem::path& models_path, From 8c35a3fc49091d7caa1d7219b9ed20ee99302c21 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:33:04 +0800 Subject: [PATCH 15/49] Update llm_pipeline_static.hpp remove adapters --- src/cpp/src/llm_pipeline_static.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpp/src/llm_pipeline_static.hpp b/src/cpp/src/llm_pipeline_static.hpp index 45275153f4..8ed3c11b76 100644 --- a/src/cpp/src/llm_pipeline_static.hpp +++ b/src/cpp/src/llm_pipeline_static.hpp @@ -57,6 +57,7 @@ class StatefulLLMPipeline : public LLMPipelineImplBase { void start_chat(const std::string& system_message) override; void finish_chat() override; + void remove_adapters(const ov::AnyMap& plugin_config) override; private: uint32_t m_max_prompt_len = 0u; From 78f4be734591dcc152f425a9069eeab7c2cba8ad Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:59:37 +0800 Subject: [PATCH 16/49] Update lora_adapter.cpp remove adapters --- src/cpp/src/lora_adapter.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp index db0f6fd0ca..88e2bddc16 100644 --- a/src/cpp/src/lora_adapter.cpp +++ b/src/cpp/src/lora_adapter.cpp @@ -1067,6 +1067,27 @@ struct AdapterControllerImpl { } } } + void remove_adapters(const AdapterConfig& config) { + const auto &adapters1 = current_config.get_adapters(), adapters2 = config.get_adapters(); + + if (adapters2.size() > 0) { + if (adapters1.size() > 0) { + // if current adpater need to remove, remove from current_config.adapters + for (const auto& adapter1 : adapters1) { + for (const auto& adapter2 : adapters2) { + if (adapter1 == adapter2) { + current_config.remove(adapter1); + need_full_apply = true; + } + } + } + } + + for (const auto& adapter2 : adapters2) { + adapter2.~Adapter(); + } + } + } bool has_state_name(const std::string& name) { return variable_names.count(name); @@ -1382,6 +1403,12 @@ void AdapterController::apply(ov::InferRequest request, const std::optional& config) { + OPENVINO_ASSERT(m_pimpl || !config || !*config, "Adapters are removed."); + if (m_pimpl) { + m_pimpl->remove_adapters(*config); + } +} bool AdapterController::has_state_name(const std::string& name) { return m_pimpl->has_state_name(name); From 2b076fcaa812b95c8e0be45e304f1379785d886c Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 17:09:58 +0800 Subject: [PATCH 17/49] Update prompt_lookup_impl.cpp remove adapters --- src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp index 03d41cc462..254d667312 100644 --- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp +++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp @@ -90,6 +90,10 @@ void ContinuousBatchingPipeline::PromptLookupImpl::step() { } } +void ContinuousBatchingPipeline::PromptLookupImpl::remove_adapters(const std::optional& adapters) { + m_pipeline->remove_adapters(adapters); +} + std::vector ContinuousBatchingPipeline::PromptLookupImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From 837070bb424c3ac121fa888cef9d7f3ab6c91611 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 17:11:19 +0800 Subject: [PATCH 18/49] Update prompt_lookup_impl.hpp remove adapters --- src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp index 0535931d81..02156a150e 100644 --- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp +++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp @@ -47,6 +47,8 @@ class ContinuousBatchingPipeline::PromptLookupImpl : public ContinuousBatchingPi const StreamerVariant& streamer) override; SpeculativeDecodingMetrics get_metrics(); + + void remove_adapters(const std::optional& adapters) override; }; -} \ No newline at end of file +} From 6e146763fcdf1d13dde39f846f92d5cc32a6fe64 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 17:13:32 +0800 Subject: [PATCH 19/49] Update speculative_decoding_impl.cpp remove adapters --- .../src/speculative_decoding/speculative_decoding_impl.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 8d3303cdb5..de276ae4d8 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -217,6 +217,11 @@ void ContinuousBatchingPipeline::SpeculativeDecodingImpl::step() { step_timer.end(); } +void ContinuousBatchingPipeline::SpeculativeDecodingImpl::remove_adapters(const std::optional& adapters) { + m_main_pipeline->remove_adapters(adapters); + m_draft_pipeline->remove_adapters(adapters); +} + std::vector ContinuousBatchingPipeline::SpeculativeDecodingImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From 972527e42d1ae03326d341356bccbb3a606bda2b Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 5 Mar 2025 17:14:30 +0800 Subject: [PATCH 20/49] Update speculative_decoding_impl.hpp remove adapters --- .../src/speculative_decoding/speculative_decoding_impl.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp index 4023519287..bc5f3c6707 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp @@ -69,6 +69,8 @@ class ContinuousBatchingPipeline::SpeculativeDecodingImpl : public ContinuousBat const StreamerVariant& streamer) override; SpeculativeDecodingMetrics get_speculative_decoding_metrics(); + + void remove_adapters(const std::optional& adapters) override; }; -} \ No newline at end of file +} From 42e5978dbe1de116ff631d53ef110e103383909a Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Fri, 7 Mar 2025 11:46:23 +0800 Subject: [PATCH 21/49] Update continuous_batching_impl.cpp remove adapters --- src/cpp/src/continuous_batching_impl.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index a417dac295..4a7c0069e2 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -150,11 +150,10 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::initialize_pipeline( const ov::AnyMap& properties, const std::vector& kv_cache_config) { // apply LoRA - std::optional adapters; - auto filtered_properties = extract_adapters_from_properties(properties, &adapters); - if (adapters) { - adapters->set_tensor_name_prefix("base_model.model.model."); - m_adapter_controller = AdapterController(model, *adapters, device); // TODO: Make the prefix name configurable + auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); + if (m_generation_config.adapters) { + m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model."); + m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable } // Extract sampler_num_threads property if exists and remove it from properties size_t sampler_num_threads = std::thread::hardware_concurrency(); From d5cbea930245e769d769db6e963412ac4019f6b3 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Fri, 7 Mar 2025 11:50:35 +0800 Subject: [PATCH 22/49] Update llm_pipeline_stateful.cpp remove adapters --- src/cpp/src/llm_pipeline_stateful.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cpp/src/llm_pipeline_stateful.cpp b/src/cpp/src/llm_pipeline_stateful.cpp index b17a06c895..5872859908 100644 --- a/src/cpp/src/llm_pipeline_stateful.cpp +++ b/src/cpp/src/llm_pipeline_stateful.cpp @@ -62,11 +62,10 @@ StatefulLLMPipeline::StatefulLLMPipeline( if (!m_use_full_chat_history) m_kv_history_trim_manager.kv_cache_seq_length_axis = kv_pos.seq_len; - std::optional adapters; - auto filtered_properties = extract_adapters_from_properties(properties, &adapters); - if (adapters) { - adapters->set_tensor_name_prefix("base_model.model."); - m_adapter_controller = AdapterController(model, *adapters, device); // TODO: Make the prefix name configurable + auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters); + if (m_generation_config.adapters) { + m_generation_config.adapters->set_tensor_name_prefix("base_model.model."); + m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable } ov::CompiledModel compiled_model; if (m_is_npu) { From f5b1b10f7057a7ac8fe84f7273675ef254b9621d Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Sat, 8 Mar 2025 09:09:10 +0800 Subject: [PATCH 23/49] Update lora_adapter.cpp --- src/cpp/src/lora_adapter.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp index 88e2bddc16..57c4f220eb 100644 --- a/src/cpp/src/lora_adapter.cpp +++ b/src/cpp/src/lora_adapter.cpp @@ -1082,10 +1082,6 @@ struct AdapterControllerImpl { } } } - - for (const auto& adapter2 : adapters2) { - adapter2.~Adapter(); - } } } From bfa970e3b2cbc8c6d35014d015066e10a6c21dba Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:25:57 +0800 Subject: [PATCH 24/49] Update continuous_batching_pipeline.hpp --- src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index 630f5a0883..d161b7b29b 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -156,7 +156,5 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { * @brief finish chat and clear kv cache. */ void finish_chat(); - - void remove_adapters(const ov::AnyMap& config_map); }; } From 31e21a84cbaf85e46bfcca8d132ee2c3c54981e6 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:27:01 +0800 Subject: [PATCH 25/49] Update llm_pipeline.hpp --- src/cpp/include/openvino/genai/llm_pipeline.hpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index c6a8ed4c38..009f08138b 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -275,17 +275,6 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { */ void finish_chat(); - /** - * @brief remove LoRA adapters. - * @param properties optional plugin properties, ov::genai::adapters property for LoRA adapters. - */ - template ::value, bool>::type = true> - util::StringAny remove_adapters(Properties&&... properties) { - remove_adapters(AnyMap{std::forward(properties)...}); - } - - void remove_adapters(const ov::AnyMap& config_map); private: std::unique_ptr m_pimpl; }; From 4a5e357415cacbde7c70a4c0bb401f30a1d61fe8 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:28:28 +0800 Subject: [PATCH 26/49] Update lora_adapter.hpp --- src/cpp/include/openvino/genai/lora_adapter.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cpp/include/openvino/genai/lora_adapter.hpp b/src/cpp/include/openvino/genai/lora_adapter.hpp index 36bca58175..754553fc43 100644 --- a/src/cpp/include/openvino/genai/lora_adapter.hpp +++ b/src/cpp/include/openvino/genai/lora_adapter.hpp @@ -193,9 +193,6 @@ class OPENVINO_GENAI_EXPORTS AdapterController { // Apply adapters configured in the current config set last time, or set and use new config given as optional `config` argument void apply(ov::InferRequest request, const std::optional& config = std::nullopt); - // Remove LoRA adapters - void remove_adapters(const std::optional& config); - // Returns true if a given name is one of the state names created by this adapter controller for dynamic LoRA // Helps to distinguish LoRA states from other states (e.g. KV cache state) in the model for a partial state reset. bool has_state_name(const std::string& name); From 5236d204cf0fc7b22643e6cf55b9446fb9c4cd7e Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:30:25 +0800 Subject: [PATCH 27/49] Update continuous_batching_adapter.hpp --- src/cpp/src/continuous_batching_adapter.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/continuous_batching_adapter.hpp b/src/cpp/src/continuous_batching_adapter.hpp index 82ce8b2317..29fb9b0463 100644 --- a/src/cpp/src/continuous_batching_adapter.hpp +++ b/src/cpp/src/continuous_batching_adapter.hpp @@ -209,10 +209,6 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { void finish_chat() override { m_impl->finish_chat(); }; - - void remove_adapters(const ov::AnyMap& plugin_config) override { - m_impl->remove_adapters(plugin_config); - }; }; } // namespace ov::genai From 29779d17a3bd0d3f4d2085d2ed8bce6f47ce0baf Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:31:57 +0800 Subject: [PATCH 28/49] Update continuous_batching_impl.cpp --- src/cpp/src/continuous_batching_impl.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index 4a7c0069e2..4b77a48eb9 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -429,12 +429,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::set_adapters(const std: } } -void ContinuousBatchingPipeline::ContinuousBatchingImpl::remove_adapters(const std::optional& adapters) { - if (m_adapter_controller) { - m_adapter_controller->remove_adapters(adapters); - } -} - std::vector ContinuousBatchingPipeline::ContinuousBatchingImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From 6dd2df1783d4d6f4de44e2174db66e058c096c20 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:39:29 +0800 Subject: [PATCH 29/49] Update continuous_batching_impl.hpp --- src/cpp/src/continuous_batching_impl.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/src/continuous_batching_impl.hpp b/src/cpp/src/continuous_batching_impl.hpp index f930b35e9a..0b015f7976 100644 --- a/src/cpp/src/continuous_batching_impl.hpp +++ b/src/cpp/src/continuous_batching_impl.hpp @@ -139,7 +139,5 @@ class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatc * Updates LoRA adapters for current generation call */ void set_adapters(const std::optional& adapters); - - void remove_adapters(const std::optional& adapters) override; }; } // namespace ov::genai From f9e23d38a4849c8fc352ccf1aab12a65f05509e6 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:40:50 +0800 Subject: [PATCH 30/49] Update continuous_batching_pipeline.cpp --- src/cpp/src/continuous_batching_pipeline.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index b4d2fe4bde..4efd477211 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -237,7 +237,3 @@ void ContinuousBatchingPipeline::start_chat(const std::string& system_message) { void ContinuousBatchingPipeline::finish_chat() { m_impl->finish_chat(); }; - -void ContinuousBatchingPipeline::remove_adapters(const ov::AnyMap& plugin_config){ - m_impl->remove_adapters(plugin_config); -}; From be4b388c60d6f37a0ec7c5521058deaab15dc537 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:43:22 +0800 Subject: [PATCH 31/49] Update icontinuous_batching.cpp --- src/cpp/src/icontinuous_batching.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cpp/src/icontinuous_batching.cpp b/src/cpp/src/icontinuous_batching.cpp index 37edbcabb3..b63527091c 100644 --- a/src/cpp/src/icontinuous_batching.cpp +++ b/src/cpp/src/icontinuous_batching.cpp @@ -40,12 +40,6 @@ void ContinuousBatchingPipeline::IContinuousBatchingPipeline::finish_chat() { m_history.clear(); }; -void ContinuousBatchingPipeline::IContinuousBatchingPipeline::remove_adapters(const ov::AnyMap& plugin_config) { - std::optional adapters; - auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); - remove_adapters(adapters); -}; - std::vector ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( const std::vector& prompts, From a0c053b1f095d76a7b3a88c57d39a4f671568d05 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:44:16 +0800 Subject: [PATCH 32/49] Update icontinuous_batching.hpp --- src/cpp/src/icontinuous_batching.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/icontinuous_batching.hpp b/src/cpp/src/icontinuous_batching.hpp index 949f0e2b46..229bd2ae8c 100644 --- a/src/cpp/src/icontinuous_batching.hpp +++ b/src/cpp/src/icontinuous_batching.hpp @@ -133,9 +133,5 @@ class ContinuousBatchingPipeline::IContinuousBatchingPipeline { * Ends chat */ void finish_chat(); - - void remove_adapters(const ov::AnyMap& plugin_config); - - virtual void remove_adapters(const std::optional& config) = 0; }; } From aeff485d5ac4e17a09689ebbc94fe88f194e88d4 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:45:03 +0800 Subject: [PATCH 33/49] Update llm_pipeline.cpp --- src/cpp/src/llm_pipeline.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 5790bb1dcd..d06a5b06e7 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -298,10 +298,6 @@ void ov::genai::LLMPipeline::finish_chat() { m_pimpl->finish_chat(); } -void ov::genai::LLMPipeline::remove_adapters(const ov::AnyMap& config_map) { - m_pimpl->remove_adapters(config_map); -} - void ov::genai::LLMPipeline::set_generation_config(const GenerationConfig& config) { m_pimpl->set_generation_config(config); } From 56d55615f532ae6b995a2f49568f89ba58de86dd Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:45:39 +0800 Subject: [PATCH 34/49] Update llm_pipeline_base.hpp --- src/cpp/src/llm_pipeline_base.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/src/llm_pipeline_base.hpp b/src/cpp/src/llm_pipeline_base.hpp index 35c411ac43..8f849e89c4 100644 --- a/src/cpp/src/llm_pipeline_base.hpp +++ b/src/cpp/src/llm_pipeline_base.hpp @@ -54,8 +54,6 @@ class LLMPipelineImplBase { virtual void start_chat(const std::string& system_message) = 0; virtual void finish_chat() = 0; - virtual void remove_adapters(const ov::AnyMap& plugin_config) = 0; - virtual ~LLMPipelineImplBase() = default; void save_load_time(std::chrono::steady_clock::time_point start_time) { From c3a0db05bf8cb2dd7dfeebeaa55d5480c03fb74b Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:46:52 +0800 Subject: [PATCH 35/49] Update llm_pipeline_stateful.cpp --- src/cpp/src/llm_pipeline_stateful.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/cpp/src/llm_pipeline_stateful.cpp b/src/cpp/src/llm_pipeline_stateful.cpp index 5872859908..5ca13269b8 100644 --- a/src/cpp/src/llm_pipeline_stateful.cpp +++ b/src/cpp/src/llm_pipeline_stateful.cpp @@ -414,12 +414,4 @@ void StatefulLLMPipeline::finish_chat() { } } -void StatefulLLMPipeline::remove_adapters(const ov::AnyMap& plugin_config) { - std::optional adapters; - auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); - - if (m_adapter_controller) { - m_adapter_controller->remove_adapters(adapters); - } -} } // namespace ov::genai From 5b025376a6d218924b1b0ee18bf5dd1acf4ac170 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:47:33 +0800 Subject: [PATCH 36/49] Update llm_pipeline_stateful.hpp --- src/cpp/src/llm_pipeline_stateful.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/src/llm_pipeline_stateful.hpp b/src/cpp/src/llm_pipeline_stateful.hpp index cabb96e5b5..3558c4c1f3 100644 --- a/src/cpp/src/llm_pipeline_stateful.hpp +++ b/src/cpp/src/llm_pipeline_stateful.hpp @@ -79,8 +79,6 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { void start_chat(const std::string& system_message) override; void finish_chat() override; - - void remove_adapters(const ov::AnyMap& plugin_config) override; }; } // namespace ov::genai From 7de647a4ff210e68ca3c733c5441ca2a28593656 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:48:08 +0800 Subject: [PATCH 37/49] Update llm_pipeline_static.cpp --- src/cpp/src/llm_pipeline_static.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 3a4d57d8e9..e93ae7ed9c 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -347,15 +347,6 @@ void StatefulLLMPipeline::finish_chat() { m_history.clear(); }; -void StatefulLLMPipeline::remove_adapters(const ov::AnyMap& plugin_config) { - std::optional adapters; - auto filtered_properties = extract_adapters_from_properties(plugin_config, &adapters); - - if (m_adapter_controller) { - m_adapter_controller->remove_adapters(adapters); - } -}; - std::unique_ptr LLMPipelineFactory::create(const std::filesystem::path& models_path, const ov::AnyMap& config) { From 8b4bf868cbba92b83f3b99f240babd7b23151fec Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:48:46 +0800 Subject: [PATCH 38/49] Update llm_pipeline_static.hpp --- src/cpp/src/llm_pipeline_static.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpp/src/llm_pipeline_static.hpp b/src/cpp/src/llm_pipeline_static.hpp index 8ed3c11b76..45275153f4 100644 --- a/src/cpp/src/llm_pipeline_static.hpp +++ b/src/cpp/src/llm_pipeline_static.hpp @@ -57,7 +57,6 @@ class StatefulLLMPipeline : public LLMPipelineImplBase { void start_chat(const std::string& system_message) override; void finish_chat() override; - void remove_adapters(const ov::AnyMap& plugin_config) override; private: uint32_t m_max_prompt_len = 0u; From 352a82b82932309cff3cbf0b62aab55bf40a7a16 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:55:34 +0800 Subject: [PATCH 39/49] Update lora_adapter.cpp --- src/cpp/src/lora_adapter.cpp | 58 +++++++++++++++--------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp index 57c4f220eb..beecd93e2d 100644 --- a/src/cpp/src/lora_adapter.cpp +++ b/src/cpp/src/lora_adapter.cpp @@ -115,19 +115,14 @@ struct AutoSafetensor: public safetensors_File { } }; - -// Reads a file with a given filename expecting Safetensors file format. -// The data is read to a solid memory block and the function returns a map of OV Constants allocated on top of that block. // The key in the map is a tensor name and the Constant uses a region of memory from the memory block. // Each Constant holds a shared pointer to the block in the runtime info. // The memory block will be deallocated when the last Constant is destroyed. -ConstantMap read_safetensors(const std::filesystem::path& filename) { - auto buffer = read_file_helper(filename); +ConstantMap safetensors_init(const ov::Tensor & safetensor) { AutoSafetensor safe_tensors_file{}; - OPENVINO_ASSERT( - safetensors_file_init(&(*buffer)[0], buffer->size(), &safe_tensors_file) == nullptr, - "Cannot parse ", filename, " as a Safetensors file format. Safetensors file format is supported only" + OPENVINO_ASSERT(safetensors_file_init(safetensor.data(), safetensor.get_byte_size(), &safe_tensors_file) == nullptr, + "Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only" ); ConstantMap tensors; @@ -139,18 +134,30 @@ ConstantMap read_safetensors(const std::filesystem::path& filename) { OPENVINO_ASSERT( ov::shape_size(shape) <= tensor.end_offset_bytes - tensor.begin_offset_bytes, - "Tensor shape ", ov::shape_size(shape), " for tensor \"", name, "\" from Safetensors file \"", filename, "\" doesn't match the expected tensor size ", + "Tensor shape ", ov::shape_size(shape), " for tensor \"", name, "\" from Safetensors file \"", "safetensor", "\" doesn't match the expected tensor size ", tensor.end_offset_bytes - tensor.begin_offset_bytes); auto type = safetensors_to_ov_element_type(tensor.dtype); auto constant = std::make_shared(type, shape, ptr, nullptr); // wraps existing memory, no ownership - constant->get_rt_info()["__safetensors_buffer_holder"] = buffer; // to automatically deallocate underlying memory buffer when last constant that holds it is destroyed + constant->get_rt_info()["__safetensors_buffer_holder"] = safetensor; // to automatically deallocate underlying memory buffer when last constant that holds it is destroyed tensors[name] = constant; } return tensors; } +// Reads a file with a given filename expecting Safetensors file format. +// The file data is mmaped to tensor. +ConstantMap read_safetensors(const std::filesystem::path& filename) { + auto safetensor = ov::read_tensor_data(filename); + + return safetensors_init(safetensor); +} + +// Reads a safetensor and creates a constantmap from the memory. +ConstantMap read_safetensors(const ov::Tensor& safetensor) { + return safetensors_init(safetensor); +} // Default LoRA tensor name patterns observed in the existing LoRA adapters, captures the prefix that should correspond to a layer name in the base model LoRAPartsParser default_lora_patterns () { @@ -847,6 +854,9 @@ class SafetensorsAdapterImpl : public AdapterImpl { SafetensorsAdapterImpl(const std::filesystem::path& path) : tensors(group_lora_tensors(read_safetensors(path), default_lora_patterns())) {} + SafetensorsAdapterImpl(const ov::Tensor& safetensor) + : tensors(group_lora_tensors(read_safetensors(safetensor), default_lora_patterns())) {} + const LoRATensors& get_tensors() const override { return tensors; } @@ -923,6 +933,10 @@ Adapter::Adapter(const std::filesystem::path& path) : } +Adapter::Adapter(const ov::Tensor& safetensor) : + m_pimpl(std::make_shared(safetensor)) { +} + bool operator== (const Adapter& a, const Adapter& b) { return a.m_pimpl->eq(b.m_pimpl.get()); } @@ -1067,23 +1081,6 @@ struct AdapterControllerImpl { } } } - void remove_adapters(const AdapterConfig& config) { - const auto &adapters1 = current_config.get_adapters(), adapters2 = config.get_adapters(); - - if (adapters2.size() > 0) { - if (adapters1.size() > 0) { - // if current adpater need to remove, remove from current_config.adapters - for (const auto& adapter1 : adapters1) { - for (const auto& adapter2 : adapters2) { - if (adapter1 == adapter2) { - current_config.remove(adapter1); - need_full_apply = true; - } - } - } - } - } - } bool has_state_name(const std::string& name) { return variable_names.count(name); @@ -1399,13 +1396,6 @@ void AdapterController::apply(ov::InferRequest request, const std::optional& config) { - OPENVINO_ASSERT(m_pimpl || !config || !*config, "Adapters are removed."); - if (m_pimpl) { - m_pimpl->remove_adapters(*config); - } -} - bool AdapterController::has_state_name(const std::string& name) { return m_pimpl->has_state_name(name); } From 2a2e3fea20825e3e17bb7ce7f85b7e41f1baea50 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:57:07 +0800 Subject: [PATCH 40/49] Update prompt_lookup_impl.cpp --- src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp index 254d667312..03d41cc462 100644 --- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp +++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp @@ -90,10 +90,6 @@ void ContinuousBatchingPipeline::PromptLookupImpl::step() { } } -void ContinuousBatchingPipeline::PromptLookupImpl::remove_adapters(const std::optional& adapters) { - m_pipeline->remove_adapters(adapters); -} - std::vector ContinuousBatchingPipeline::PromptLookupImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From 7cb3fbbb2d0d29c927c2c9137733bcdea9234e71 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:57:43 +0800 Subject: [PATCH 41/49] Update prompt_lookup_impl.hpp --- src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp index 02156a150e..1393cecc13 100644 --- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp +++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp @@ -47,8 +47,6 @@ class ContinuousBatchingPipeline::PromptLookupImpl : public ContinuousBatchingPi const StreamerVariant& streamer) override; SpeculativeDecodingMetrics get_metrics(); - - void remove_adapters(const std::optional& adapters) override; }; } From cd5337d480fbdd38fb6bd08bb89eb563233b8411 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:58:49 +0800 Subject: [PATCH 42/49] Update speculative_decoding_impl.cpp --- .../src/speculative_decoding/speculative_decoding_impl.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index de276ae4d8..8d3303cdb5 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -217,11 +217,6 @@ void ContinuousBatchingPipeline::SpeculativeDecodingImpl::step() { step_timer.end(); } -void ContinuousBatchingPipeline::SpeculativeDecodingImpl::remove_adapters(const std::optional& adapters) { - m_main_pipeline->remove_adapters(adapters); - m_draft_pipeline->remove_adapters(adapters); -} - std::vector ContinuousBatchingPipeline::SpeculativeDecodingImpl::generate(const std::vector& input_ids, const std::vector& sampling_params, From b41810520be192b21eaa2faf80cac160a24e6e7d Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:59:15 +0800 Subject: [PATCH 43/49] Update speculative_decoding_impl.hpp --- src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp index bc5f3c6707..56626dae48 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp @@ -69,8 +69,6 @@ class ContinuousBatchingPipeline::SpeculativeDecodingImpl : public ContinuousBat const StreamerVariant& streamer) override; SpeculativeDecodingMetrics get_speculative_decoding_metrics(); - - void remove_adapters(const std::optional& adapters) override; }; } From 4abb13566d549ae5beb4c89d7f36db2eea345a96 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Mon, 17 Mar 2025 16:09:21 +0800 Subject: [PATCH 44/49] Update lora_adapter.hpp --- src/cpp/include/openvino/genai/lora_adapter.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpp/include/openvino/genai/lora_adapter.hpp b/src/cpp/include/openvino/genai/lora_adapter.hpp index 754553fc43..b7bfcf01d1 100644 --- a/src/cpp/include/openvino/genai/lora_adapter.hpp +++ b/src/cpp/include/openvino/genai/lora_adapter.hpp @@ -37,6 +37,7 @@ class OPENVINO_GENAI_EXPORTS Adapter { Adapter(const std::shared_ptr& pimpl); public: explicit Adapter(const std::filesystem::path& path); + explicit Adapter(const ov::Tensor& safetensor); Adapter() = default; operator bool() const { From 5d90e7aa9874a1888f8b8b2f4a8a63be2e2120b8 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:30:20 +0800 Subject: [PATCH 45/49] Update icontinuous_batching.cpp --- src/cpp/src/icontinuous_batching.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpp/src/icontinuous_batching.cpp b/src/cpp/src/icontinuous_batching.cpp index b63527091c..21b659804f 100644 --- a/src/cpp/src/icontinuous_batching.cpp +++ b/src/cpp/src/icontinuous_batching.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 #include "icontinuous_batching.hpp" -#include "lora_helper.hpp" namespace ov::genai { From 7b9896817e663040c861ccc1bfc1b00d9e5fcf2e Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:31:28 +0800 Subject: [PATCH 46/49] Update llm_pipeline_static.cpp --- src/cpp/src/llm_pipeline_static.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index e93ae7ed9c..ecd2f1b258 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -14,7 +14,6 @@ #include "openvino/genai/text_streamer.hpp" #include -#include "lora_helper.hpp" namespace { From 4d3ff3021b4d5a9d5e8d05cdd519109ab7fc59ab Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:37:17 +0800 Subject: [PATCH 47/49] Update lora_adapter.cpp --- src/cpp/src/lora_adapter.cpp | 39 +++--------------------------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp index beecd93e2d..1e4bfb711f 100644 --- a/src/cpp/src/lora_adapter.cpp +++ b/src/cpp/src/lora_adapter.cpp @@ -60,8 +60,6 @@ using namespace ov::op; using namespace ov::genai::utils; // FIXME: Use ov::AlignedBuffer instead of std::vector. ov::AlignedBuffer is not available in public OV API -using Buffer = std::vector; -using BufferPtr = std::shared_ptr; using ConstantVector = std::vector>; @@ -69,25 +67,6 @@ using ConstantVector = std::vector>; using LoRANode = LoRAParts>; using LoRAPartsParser = LoRAParts(const std::string& name)>>; - -// Read binary file to memory. -BufferPtr read_file_helper(const std::filesystem::path& filename) { - std::ifstream file(filename, std::ios::binary | std::ios::ate); - OPENVINO_ASSERT(file.is_open(), "Cannot open file with LoRA weights: ", filename); - - size_t filesize = file.tellg(); - auto buffer = std::make_shared(filesize); - file.seekg(0, std::ios::beg); - // TODO: Use mmapped AlignedBuffer as ov::Core::read_model can do, necessary functionality is not available in public OV API. - // LoRA files do not usually have huge size in comparison to the base models, but it can vary depending on adapter, - // and using mmap will help to optimize memory consumption and could be critical - // when the application at the edge of available memory that is not really uncommon for applications dealing with LLMs. - file.read(&(*buffer)[0], filesize); - - return buffer; -} - - // Converts Safetensors element type to OV element type. Only part of the types are supported. ov::element::Type safetensors_to_ov_element_type (int dtype) { switch(dtype) { @@ -102,10 +81,8 @@ ov::element::Type safetensors_to_ov_element_type (int dtype) { } } - using ConstantMap = std::map>; - // Safetensor file parser that deallocates temporary buffers automatically. // Drop-in replacement for the third party safetensors_File struct. struct AutoSafetensor: public safetensors_File { @@ -118,7 +95,7 @@ struct AutoSafetensor: public safetensors_File { // The key in the map is a tensor name and the Constant uses a region of memory from the memory block. // Each Constant holds a shared pointer to the block in the runtime info. // The memory block will be deallocated when the last Constant is destroyed. -ConstantMap safetensors_init(const ov::Tensor & safetensor) { +ConstantMap safetensor_to_constant_map(const ov::Tensor& safetensor) { AutoSafetensor safe_tensors_file{}; OPENVINO_ASSERT(safetensors_file_init(safetensor.data(), safetensor.get_byte_size(), &safe_tensors_file) == nullptr, @@ -132,11 +109,6 @@ ConstantMap safetensors_init(const ov::Tensor & safetensor) { ov::Shape shape(tensor.shape, tensor.shape + tensor.n_dimensions); void* ptr = tensor.ptr; // FIXME: needs a non-constant pointer because Tensor doesn't accept a constant pointer - OPENVINO_ASSERT( - ov::shape_size(shape) <= tensor.end_offset_bytes - tensor.begin_offset_bytes, - "Tensor shape ", ov::shape_size(shape), " for tensor \"", name, "\" from Safetensors file \"", "safetensor", "\" doesn't match the expected tensor size ", - tensor.end_offset_bytes - tensor.begin_offset_bytes); - auto type = safetensors_to_ov_element_type(tensor.dtype); auto constant = std::make_shared(type, shape, ptr, nullptr); // wraps existing memory, no ownership @@ -151,12 +123,7 @@ ConstantMap safetensors_init(const ov::Tensor & safetensor) { ConstantMap read_safetensors(const std::filesystem::path& filename) { auto safetensor = ov::read_tensor_data(filename); - return safetensors_init(safetensor); -} - -// Reads a safetensor and creates a constantmap from the memory. -ConstantMap read_safetensors(const ov::Tensor& safetensor) { - return safetensors_init(safetensor); + return safetensor_to_constant_map(safetensor); } // Default LoRA tensor name patterns observed in the existing LoRA adapters, captures the prefix that should correspond to a layer name in the base model @@ -855,7 +822,7 @@ class SafetensorsAdapterImpl : public AdapterImpl { tensors(group_lora_tensors(read_safetensors(path), default_lora_patterns())) {} SafetensorsAdapterImpl(const ov::Tensor& safetensor) - : tensors(group_lora_tensors(read_safetensors(safetensor), default_lora_patterns())) {} + : tensors(group_lora_tensors(safetensor_to_constant_map(safetensor), default_lora_patterns())) {} const LoRATensors& get_tensors() const override { return tensors; From 1e5411d6813c2cd64ffa1242d336b033f298e959 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:38:40 +0800 Subject: [PATCH 48/49] Update py_openvino_genai.pyi --- src/python/openvino_genai/py_openvino_genai.pyi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 0ed1f12caa..42528cf03e 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -21,6 +21,12 @@ class Adapter: Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. path (os.PathLike): Path to adapter file in safetensors format. """ + @typing.overload + def __init__(self, safetensor: openvino._pyopenvino.Tensor) -> None: + """ + Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. + safetensor (ov.Tensor): Pre-read LoRA Adapter safetensor. + """ class AdapterConfig: """ Adapter config that defines a combination of LoRA adapters with blending parameters. From de4c06a54ae2079ef8af9cb5358ee2863dca9ab7 Mon Sep 17 00:00:00 2001 From: wenyi5608 <93560477+wenyi5608@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:39:47 +0800 Subject: [PATCH 49/49] Update py_lora_adapter.cpp --- src/python/py_lora_adapter.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/python/py_lora_adapter.cpp b/src/python/py_lora_adapter.cpp index 54ea6cf0b9..cc795bd1cc 100644 --- a/src/python/py_lora_adapter.cpp +++ b/src/python/py_lora_adapter.cpp @@ -25,6 +25,16 @@ void init_lora_adapter(py::module_& m) { Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. path (os.PathLike): Path to adapter file in safetensors format. )") + .def(py::init([]( + const ov::Tensor& safetensor + ) { + return ov::genai::Adapter(safetensor); + }), + py::arg("safetensor"), "ov::Tensor with pre-read LoRA Adapter safetensor", + R"( + Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. + safetensor (ov.Tensor): Pre-read LoRA Adapter safetensor. + )") .def( "__bool__", [](ov::genai::Adapter& self