Skip to content

Commit e4f31b8

Browse files
llandwerlin-intelMarge Bot
authored and
Marge Bot
committed
intel/ds: rework RT tracepoints
That way we can identify single dispatch within each step. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Michael Cheng <michael.cheng@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33684>
1 parent 31c5c38 commit e4f31b8

7 files changed

+70
-111
lines changed

src/intel/ds/intel_driver_ds.h

+3-5
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,10 @@ enum intel_ds_tracepoint_flags {
7373
*/
7474
INTEL_DS_TRACEPOINT_FLAG_END_CS = BITFIELD_BIT(1),
7575
/**
76-
* Whether this tracepoint's timestamp is recorded on the compute pipeline
77-
* or from top of pipe if there was no dispatch (useful for acceleration
78-
* structure builds where the runtime might choose to not emit anything for
79-
* a number of reasons).
76+
* Whether this tracepoint doesn't generate a timestamp but instead repeats
77+
* the last one.
8078
*/
81-
INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP = BITFIELD_BIT(2),
79+
INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST = BITFIELD_BIT(2),
8280
};
8381

8482
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */

src/intel/ds/intel_tracepoints.py

+11-12
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def define_tracepoints(args):
4747

4848
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
4949
tp_default_enabled=True, end_pipelined=True,
50-
compute=False, maybe_compute=False,
50+
compute=False, repeat_last=False,
5151
need_cs_param=False):
5252
global intel_default_tps
5353
if tp_default_enabled:
@@ -69,8 +69,8 @@ def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
6969
if end_pipelined:
7070
if compute:
7171
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS')
72-
elif maybe_compute:
73-
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP')
72+
elif repeat_last:
73+
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST')
7474
else:
7575
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE')
7676
Tracepoint('intel_end_{0}'.format(name),
@@ -229,15 +229,14 @@ def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
229229
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),],
230230
need_cs_param=True)
231231

232-
rt_args = [Arg(type='uint32_t', var='cs_hash', c_format='%u')]
233-
begin_end_tp('as_build', tp_args=rt_args)
234-
begin_end_tp('as_build_leaves', tp_args=rt_args, maybe_compute=True)
235-
begin_end_tp('as_morton_generate', tp_args=rt_args, maybe_compute=True)
236-
begin_end_tp('as_morton_sort', tp_args=rt_args, maybe_compute=True)
237-
begin_end_tp('as_lbvh_build_internal', tp_args=rt_args, maybe_compute=True)
238-
begin_end_tp('as_ploc_build_internal', tp_args=rt_args, maybe_compute=True)
239-
begin_end_tp('as_encode', tp_args=rt_args, maybe_compute=True)
240-
begin_end_tp('as_copy', tp_args=rt_args, maybe_compute=True)
232+
begin_end_tp('as_build')
233+
begin_end_tp('as_build_leaves', repeat_last=True)
234+
begin_end_tp('as_morton_generate', repeat_last=True)
235+
begin_end_tp('as_morton_sort', repeat_last=True)
236+
begin_end_tp('as_lbvh_build_internal', repeat_last=True)
237+
begin_end_tp('as_ploc_build_internal', repeat_last=True)
238+
begin_end_tp('as_encode', repeat_last=True)
239+
begin_end_tp('as_copy', repeat_last=True)
241240

242241
begin_end_tp('rays',
243242
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),

src/intel/vulkan/anv_private.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ enum anv_timestamp_capture_type {
10101010
ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
10111011
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
10121012
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
1013+
ANV_TIMESTAMP_REPEAT_LAST,
10131014
};
10141015

10151016
struct anv_physical_device {
@@ -6532,10 +6533,13 @@ struct anv_utrace_submit {
65326533
struct anv_state_stream general_state_stream;
65336534

65346535
/* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
6535-
* timestamps)
6536+
* timestamps), the timestamp is not scaled to the CPU time domain.
65366537
*/
65376538
uint64_t last_full_timestamp;
65386539

6540+
/* Last timestamp, not scaled to the CPU time domain */
6541+
uint64_t last_timestamp;
6542+
65396543
/* Memcpy state tracking (only used for timestamp copies on render engine) */
65406544
struct anv_memcpy_state memcpy_state;
65416545

src/intel/vulkan/anv_utrace.c

+25-29
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,10 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
351351
const bool is_end_compute =
352352
cs == NULL &&
353353
(flags & INTEL_DS_TRACEPOINT_FLAG_END_CS);
354-
const bool is_end_compute_or_noop =
355-
cs == NULL &&
356-
(flags & INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP);
357354
enum anv_timestamp_capture_type capture_type;
358-
if (is_end_compute) {
355+
if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) {
356+
capture_type = ANV_TIMESTAMP_REPEAT_LAST;
357+
} else if (is_end_compute) {
359358
assert(device->info->verx10 < 125 ||
360359
!is_end_compute ||
361360
cmd_buffer->state.last_indirect_dispatch != NULL ||
@@ -366,15 +365,6 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
366365
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH :
367366
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) :
368367
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE;
369-
} else if (is_end_compute_or_noop) {
370-
capture_type =
371-
device->info->verx10 >= 125 ?
372-
(cmd_buffer->state.last_indirect_dispatch != NULL ?
373-
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH :
374-
(cmd_buffer->state.last_compute_walker != NULL ?
375-
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER :
376-
ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE)) :
377-
ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
378368
} else {
379369
capture_type = (flags & INTEL_DS_TRACEPOINT_FLAG_END_CS) ?
380370
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE :
@@ -418,6 +408,11 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
418408
assert(result == VK_SUCCESS);
419409
}
420410

411+
if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) {
412+
return intel_device_info_timebase_scale(device->info,
413+
submit->last_timestamp);
414+
}
415+
421416
assert(offset_B % sizeof(union anv_utrace_timestamp) == 0);
422417
union anv_utrace_timestamp *ts =
423418
(union anv_utrace_timestamp *)(bo->map + offset_B);
@@ -426,31 +421,32 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
426421
if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
427422
return U_TRACE_NO_TIMESTAMP;
428423

424+
uint64_t timestamp;
425+
429426
/* Detect a 16/32 bytes timestamp write */
430427
if (ts->gfx20_postsync_data[1] != 0 ||
431428
ts->gfx20_postsync_data[2] != 0 ||
432429
ts->gfx20_postsync_data[3] != 0) {
433430
if (device->info->ver >= 20) {
434-
return intel_device_info_timebase_scale(device->info,
435-
ts->gfx20_postsync_data[3]);
431+
timestamp = ts->gfx20_postsync_data[3];
432+
} else {
433+
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits.
434+
* We need to rebuild the full 64bits using the previous timestamp.
435+
* We assume that utrace is reading the timestamp in order. Anyway
436+
* timestamp rollover on 32bits in a few minutes so in most cases
437+
* that should be correct.
438+
*/
439+
timestamp =
440+
(submit->last_full_timestamp & 0xffffffff00000000) |
441+
(uint64_t) ts->gfx125_postsync_data[3];
436442
}
437-
438-
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
439-
* need to rebuild the full 64bits using the previous timestamp. We
440-
* assume that utrace is reading the timestamp in order. Anyway
441-
* timestamp rollover on 32bits in a few minutes so in most cases that
442-
* should be correct.
443-
*/
444-
uint64_t timestamp =
445-
(submit->last_full_timestamp & 0xffffffff00000000) |
446-
(uint64_t) ts->gfx125_postsync_data[3];
447-
448-
return intel_device_info_timebase_scale(device->info, timestamp);
443+
} else {
444+
submit->last_full_timestamp = timestamp = ts->timestamp;
449445
}
450446

451-
submit->last_full_timestamp = ts->timestamp;
447+
submit->last_timestamp = timestamp;
452448

453-
return intel_device_info_timebase_scale(device->info, ts->timestamp);
449+
return intel_device_info_timebase_scale(device->info, timestamp);
454450
}
455451

456452
static void

src/intel/vulkan/genX_acceleration_structure.c

+10-43
Original file line numberDiff line numberDiff line change
@@ -72,41 +72,29 @@ static void
7272
end_debug_marker(VkCommandBuffer commandBuffer)
7373
{
7474
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
75-
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
76-
struct anv_compute_pipeline *pipeline =
77-
anv_pipeline_to_compute(comp_state->base.pipeline);
78-
const struct brw_cs_prog_data *cs_prog_data =
79-
brw_cs_prog_data_const(pipeline->cs->prog_data);
8075

8176
cmd_buffer->state.rt.debug_marker_count--;
8277
switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) {
8378
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
84-
trace_intel_end_as_build(&cmd_buffer->trace,
85-
cs_prog_data->base.source_hash);
79+
trace_intel_end_as_build(&cmd_buffer->trace);
8680
break;
8781
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
88-
trace_intel_end_as_build_leaves(&cmd_buffer->trace,
89-
cs_prog_data->base.source_hash);
82+
trace_intel_end_as_build_leaves(&cmd_buffer->trace);
9083
break;
9184
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
92-
trace_intel_end_as_morton_generate(&cmd_buffer->trace,
93-
cs_prog_data->base.source_hash);
85+
trace_intel_end_as_morton_generate(&cmd_buffer->trace);
9486
break;
9587
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
96-
trace_intel_end_as_morton_sort(&cmd_buffer->trace,
97-
cs_prog_data->base.source_hash);
88+
trace_intel_end_as_morton_sort(&cmd_buffer->trace);
9889
break;
9990
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
100-
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace,
101-
cs_prog_data->base.source_hash);
91+
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
10292
break;
10393
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
104-
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace,
105-
cs_prog_data->base.source_hash);
94+
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
10695
break;
10796
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
108-
trace_intel_end_as_encode(&cmd_buffer->trace,
109-
cs_prog_data->base.source_hash);
97+
trace_intel_end_as_encode(&cmd_buffer->trace);
11098
break;
11199
default:
112100
unreachable("Invalid build step");
@@ -725,12 +713,6 @@ genX(CmdCopyAccelerationStructureKHR)(
725713
return;
726714
}
727715

728-
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
729-
struct anv_compute_pipeline *compute_pipeline =
730-
anv_pipeline_to_compute(anv_pipeline);
731-
const struct brw_cs_prog_data *cs_prog_data =
732-
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
733-
734716
struct anv_cmd_saved_state saved;
735717
anv_cmd_buffer_save_state(cmd_buffer,
736718
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
@@ -773,8 +755,7 @@ genX(CmdCopyAccelerationStructureKHR)(
773755

774756
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
775757

776-
trace_intel_end_as_copy(&cmd_buffer->trace,
777-
cs_prog_data->base.source_hash);
758+
trace_intel_end_as_copy(&cmd_buffer->trace);
778759
}
779760

780761
void
@@ -799,12 +780,6 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
799780
return;
800781
}
801782

802-
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
803-
struct anv_compute_pipeline *compute_pipeline =
804-
anv_pipeline_to_compute(anv_pipeline);
805-
const struct brw_cs_prog_data *cs_prog_data =
806-
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
807-
808783
struct anv_cmd_saved_state saved;
809784
anv_cmd_buffer_save_state(cmd_buffer,
810785
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
@@ -851,8 +826,7 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
851826

852827
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
853828

854-
trace_intel_end_as_copy(&cmd_buffer->trace,
855-
cs_prog_data->base.source_hash);
829+
trace_intel_end_as_copy(&cmd_buffer->trace);
856830
}
857831

858832
void
@@ -876,12 +850,6 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
876850
return;
877851
}
878852

879-
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
880-
struct anv_compute_pipeline *compute_pipeline =
881-
anv_pipeline_to_compute(anv_pipeline);
882-
const struct brw_cs_prog_data *cs_prog_data =
883-
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
884-
885853
struct anv_cmd_saved_state saved;
886854
anv_cmd_buffer_save_state(cmd_buffer,
887855
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
@@ -911,8 +879,7 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
911879
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
912880
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
913881

914-
trace_intel_end_as_copy(&cmd_buffer->trace,
915-
cs_prog_data->base.source_hash);
882+
trace_intel_end_as_copy(&cmd_buffer->trace);
916883
}
917884

918885
void

src/intel/vulkan/genX_cmd_buffer.c

+4
Original file line numberDiff line numberDiff line change
@@ -6269,6 +6269,10 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
62696269
}
62706270
#endif
62716271

6272+
case ANV_TIMESTAMP_REPEAT_LAST:
6273+
/* Noop */
6274+
break;
6275+
62726276
default:
62736277
unreachable("invalid");
62746278
}

src/intel/vulkan/genX_cmd_compute.c

+12-21
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,7 @@ void genX(CmdDispatchBase)(
626626
prog_data->local_size[0] * prog_data->local_size[1] *
627627
prog_data->local_size[2]);
628628

629-
if (cmd_buffer->state.rt.debug_marker_count == 0)
630-
trace_intel_begin_compute(&cmd_buffer->trace);
629+
trace_intel_begin_compute(&cmd_buffer->trace);
631630

632631
cmd_buffer_flush_compute_state(cmd_buffer);
633632

@@ -639,11 +638,9 @@ void genX(CmdDispatchBase)(
639638
groupCountX, groupCountY, groupCountZ,
640639
false);
641640

642-
if (cmd_buffer->state.rt.debug_marker_count == 0) {
643-
trace_intel_end_compute(&cmd_buffer->trace,
644-
groupCountX, groupCountY, groupCountZ,
645-
prog_data->base.source_hash);
646-
}
641+
trace_intel_end_compute(&cmd_buffer->trace,
642+
groupCountX, groupCountY, groupCountZ,
643+
prog_data->base.source_hash);
647644
}
648645

649646
static void
@@ -686,8 +683,7 @@ emit_unaligned_cs_walker(
686683
prog_data->local_size[0] * prog_data->local_size[1] *
687684
prog_data->local_size[2]);
688685

689-
if (cmd_buffer->state.rt.debug_marker_count == 0)
690-
trace_intel_begin_compute(&cmd_buffer->trace);
686+
trace_intel_begin_compute(&cmd_buffer->trace);
691687

692688
assert(!prog_data->uses_num_work_groups);
693689
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
@@ -700,11 +696,9 @@ emit_unaligned_cs_walker(
700696
dispatch, groupCountX, groupCountY, groupCountZ);
701697
#endif
702698

703-
if (cmd_buffer->state.rt.debug_marker_count == 0) {
704-
trace_intel_end_compute(&cmd_buffer->trace,
705-
groupCountX, groupCountY, groupCountZ,
706-
prog_data->base.source_hash);
707-
}
699+
trace_intel_end_compute(&cmd_buffer->trace,
700+
groupCountX, groupCountY, groupCountZ,
701+
prog_data->base.source_hash);
708702
}
709703

710704
/*
@@ -795,8 +789,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
795789
"compute indirect",
796790
0);
797791

798-
if (cmd_buffer->state.rt.debug_marker_count == 0)
799-
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
792+
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
800793

801794
cmd_buffer_flush_compute_state(cmd_buffer);
802795

@@ -806,11 +799,9 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
806799
emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0,
807800
0, 0, is_unaligned_size_x);
808801

809-
if (cmd_buffer->state.rt.debug_marker_count == 0) {
810-
trace_intel_end_compute_indirect(&cmd_buffer->trace,
811-
anv_address_utrace(indirect_addr),
812-
prog_data->base.source_hash);
813-
}
802+
trace_intel_end_compute_indirect(&cmd_buffer->trace,
803+
anv_address_utrace(indirect_addr),
804+
prog_data->base.source_hash);
814805
}
815806

816807
void genX(CmdDispatchIndirect)(

0 commit comments

Comments
 (0)