@@ -21,7 +21,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(
21
21
22
22
ov::Core core;
23
23
24
- auto [core_properties, compile_properties] = utils::split_core_complile_config (properties);
24
+ auto [core_properties, compile_properties] = utils::split_core_compile_config (properties);
25
25
core.set_property (core_properties);
26
26
27
27
// The model can be compiled for GPU as well
@@ -57,7 +57,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init(
57
57
}
58
58
59
59
SchedulerConfig updated_config = scheduler_config;
60
- // update KV number in scheduler config
60
+ // update KV blocks number in scheduler config
61
61
if (scheduler_config.num_kv_blocks != device_config.get_num_kv_blocks ()) {
62
62
updated_config.num_kv_blocks = device_config.get_num_kv_blocks ();
63
63
}
@@ -166,24 +166,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
166
166
timer.start ();
167
167
logits = m_model_runner->forward (m_requests, scheduler_output);
168
168
timer.end ();
169
-
170
- ov::InferRequest infer_request = m_model_runner->get_infer_request ();
171
- ov::CompiledModel compiled_model = infer_request.get_compiled_model ();
172
- const bool is_profiling_enabled = compiled_model.get_property (ov::enable_profiling);
173
-
174
- // collect detailed statistic
175
- if (is_profiling_enabled) {
176
- std::vector<ov::ProfilingInfo> profiling_info = m_model_runner->get_infer_request ().get_profiling_info ();
177
- for (const ov::ProfilingInfo& info : profiling_info) {
178
- double current_time = info.real_time .count ();
179
- if (info.node_type == " PagedAttentionExtension" ) {
180
- m_perf.m_paged_attention_time_ms += current_time;
181
- } else if (info.node_type == " FullyConnected" ) {
182
- m_perf.m_matmul_time_ms += current_time;
183
- }
184
- m_perf.m_infer_total_ms += current_time;
185
- }
186
- }
187
169
}
188
170
189
171
#ifdef DEBUG_CACHE_STATE_DUMP
0 commit comments