Skip to content

Commit 6f160e0

Browse files
CB: drop profiling as it drops performance (openvinotoolkit#1280)
1 parent f59a638 commit 6f160e0

6 files changed

+8
-26
lines changed

src/cpp/src/continuous_batching_impl.cpp

+2-20
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(
2121

2222
ov::Core core;
2323

24-
auto [core_properties, compile_properties] = utils::split_core_complile_config(properties);
24+
auto [core_properties, compile_properties] = utils::split_core_compile_config(properties);
2525
core.set_property(core_properties);
2626

2727
// The model can be compiled for GPU as well
@@ -57,7 +57,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init(
5757
}
5858

5959
SchedulerConfig updated_config = scheduler_config;
60-
// update KV number in scheduler config
60+
// update KV blocks number in scheduler config
6161
if (scheduler_config.num_kv_blocks != device_config.get_num_kv_blocks()) {
6262
updated_config.num_kv_blocks = device_config.get_num_kv_blocks();
6363
}
@@ -166,24 +166,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
166166
timer.start();
167167
logits = m_model_runner->forward(m_requests, scheduler_output);
168168
timer.end();
169-
170-
ov::InferRequest infer_request = m_model_runner->get_infer_request();
171-
ov::CompiledModel compiled_model = infer_request.get_compiled_model();
172-
const bool is_profiling_enabled = compiled_model.get_property(ov::enable_profiling);
173-
174-
// collect detailed statistic
175-
if (is_profiling_enabled) {
176-
std::vector<ov::ProfilingInfo> profiling_info = m_model_runner->get_infer_request().get_profiling_info();
177-
for (const ov::ProfilingInfo& info : profiling_info) {
178-
double current_time = info.real_time.count();
179-
if (info.node_type == "PagedAttentionExtension") {
180-
m_perf.m_paged_attention_time_ms += current_time;
181-
} else if (info.node_type == "FullyConnected") {
182-
m_perf.m_matmul_time_ms += current_time;
183-
}
184-
m_perf.m_infer_total_ms += current_time;
185-
}
186-
}
187169
}
188170

189171
#ifdef DEBUG_CACHE_STATE_DUMP

src/cpp/src/llm_pipeline.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,15 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
6363
{
6464
ov::Core core;
6565
if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) {
66-
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(*filtered_plugin_config);
66+
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(*filtered_plugin_config);
6767
core.set_property(core_plugin_config);
6868
auto model = core.read_model(models_path / "openvino_model.xml");
6969
m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model.");
7070
m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable
7171
utils::slice_matmul_statefull_model(model);
7272
m_model_runner = core.compile_model(model, device, compile_plugin_config).create_infer_request();
7373
} else {
74-
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config);
74+
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(plugin_config);
7575
core.set_property(core_plugin_config);
7676
auto model = core.read_model(models_path / "openvino_model.xml");
7777
utils::slice_matmul_statefull_model(model);

src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(
3131
const ov::genai::ModelDesc draft_model_desc,
3232
const ov::AnyMap& tokenizer_properties) {
3333
ov::Core core;
34-
auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(main_properties);
34+
auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(main_properties);
3535
core.set_property(core_properties);
3636

3737
std::filesystem::path openvino_model_name = "openvino_model.xml",

src/cpp/src/utils.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ ProcessorConfig from_any_map(
203203
* There are not supported by `core.compile` function plugin options like `ENABLE_MMAP`
204204
* Move this options to `core.set_property` config
205205
*/
206-
std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& properties) {
206+
std::pair<ov::AnyMap, ov::AnyMap> split_core_compile_config(const ov::AnyMap& properties) {
207207
const std::vector<std::string> unsupported_by_compile_properties{"ENABLE_MMAP"};
208208
ov::AnyMap core_properties;
209209
ov::AnyMap compile_properties{properties};

src/cpp/src/utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ ProcessorConfig from_any_map(
7878
const ProcessorConfig& initial
7979
);
8080

81-
std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& properties);
81+
std::pair<ov::AnyMap, ov::AnyMap> split_core_compile_config(const ov::AnyMap& properties);
8282

8383
ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::TokenizedInputs& minuend, const ov::genai::TokenizedInputs& subtrahend);
8484

src/cpp/src/whisper_pipeline.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
5353
const ov::AnyMap& properties)
5454
: WhisperPipelineImplBase{models_path} {
5555
ov::Core core = utils::singleton_core();
56-
auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(properties);
56+
auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(properties);
5757
core.set_property(core_properties);
5858

5959
m_models.encoder =

0 commit comments

Comments
 (0)