@@ -1244,6 +1244,39 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
1244
1244
GPU_DEBUG_TRACE_DETAIL << " - inputs[" << i << " ] : " << _deps[i].first ->id () << std::endl;
1245
1245
}
1246
1246
GPU_DEBUG_TRACE_DETAIL << " -----------------------------------------------------------------" << std::endl;
1247
+
1248
+ std::vector<std::string> print_ids = {" pagedattentionextension:PagedAttentionExtension_606" ,
1249
+ " gemm:MatMul_112999" ,
1250
+ " softmax:Softmax_113002" ,
1251
+ " gemm:__module.model.layers.0.self_attn/aten::transpose/Transpose_3" ,
1252
+ /* BATCHED chatglm3 fp32 */
1253
+ " matmul:MatMul_113004" ,
1254
+ " add:Add_113006" ,
1255
+ " softmax:Softmax_113007" ,
1256
+ " matmul:__module.model.layers.0.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention" ,
1257
+ " transpose:__module.model.layers.0.self_attn/aten::transpose/Transpose_3" ,
1258
+ /* Batched open_llama-7b fp32 + INT8 */
1259
+ " matmul:MatMul_158917" ,
1260
+ " add:Add_158919" ,
1261
+ " softmax:Softmax_158920" ,
1262
+ " matmul:__module.model.layers.0.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention" ,
1263
+ /* open llama FP32_INT4 */
1264
+ };
1265
+
1266
+ if (_impl_params->desc ->type_string () == " paged_attention" ||
1267
+ _impl_params->desc ->type_string () == " softmax" ||
1268
+ _impl_params->desc ->type_string () == " gemm" ||
1269
+ _impl_params->desc ->type_string () == " eltwise" ||
1270
+ _impl_params->desc ->type_string () == " add" ||
1271
+ _impl_params->desc ->type_string () == " transpose" )
1272
+ print_ids.push_back (id ());
1273
+
1274
+ if (std::find (print_ids.begin (), print_ids.end (), id ()) != print_ids.end () && get_network ().get_config ().get_property (ov::enable_profiling)) {
1275
+ GPU_DEBUG_INFO << " Execute " << id () << " (type: " << _impl_params->desc ->type_string () << " ) " << std::endl;
1276
+ for (size_t i = 0 ; i < _deps.size (); ++i) {
1277
+ GPU_DEBUG_INFO << " - inputs[" << i << " ] : " << _deps[i].first ->id () << " - " << _deps[i].first ->get_output_layout (0 ).to_short_string () << std::endl;
1278
+ }
1279
+ }
1247
1280
bool need_args_update = false ;
1248
1281
_mem_changed = false ;
1249
1282
const auto orig_outputs = _outputs;
@@ -1400,14 +1433,15 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
1400
1433
GPU_DEBUG_PROFILED_STAGE (instrumentation::pipeline_stage::inference);
1401
1434
auto ev = _impl->execute (dependencies, *this );
1402
1435
1403
- GPU_DEBUG_IF (!debug_config-> dump_profiling_data . empty ( )) {
1436
+ if ( std::find (print_ids. begin (), print_ids. end (), id ()) != print_ids. end () && get_network (). get_config (). get_property (ov::enable_profiling )) {
1404
1437
get_network ().get_stream ().wait_for_events ({ev});
1405
1438
1406
1439
if (ev != nullptr ) {
1407
1440
auto profiling_info = ev->get_profiling_info ();
1408
1441
for (const auto &interval : profiling_info) {
1409
1442
if (interval.stage == cldnn::instrumentation::profiling_stage::executing) {
1410
- GPU_DEBUG_CODE (stage_prof.set_custom_stage_duration (interval.value ->value ()));
1443
+ auto time_res0 = std::chrono::duration_cast<std::chrono::microseconds>(interval.value ->value ()).count ();
1444
+ GPU_DEBUG_INFO << id () << " performace time = " << time_res0 << " mcs\n " ;
1411
1445
}
1412
1446
}
1413
1447
}
0 commit comments