Skip to content

Commit ebce524

Browse files
committed
[GPU] Remove unused PagedAttention inputs causing set_arg error in case of zero buffer
1 parent d757efd commit ebce524

File tree

3 files changed

+11
-29
lines changed

3 files changed

+11
-29
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp

-22
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,6 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
214214
if (desc->has_alibi) {
215215
args.inputs.push_back(instance.alibi_memory_ptr());
216216
}
217-
218-
if (desc->has_rotated_blocks) {
219-
args.inputs.push_back(instance.rotated_block_indices_memory_ptr());
220-
args.inputs.push_back(instance.rotation_deltas_memory_ptr());
221-
args.inputs.push_back(instance.rotation_trig_lut_memory_ptr());
222-
}
223217
} else if (kernel_idx == 2 || kernel_idx == 3) {
224218
// Finalization kernel or mixed stage finalization kernel
225219
args.inputs = { instance.past_lens_memory_ptr() };
@@ -687,10 +681,6 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
687681
if (has_alibi)
688682
inputs_number++;
689683

690-
const auto has_rotation = impl_param.input_layouts.size() == 16;
691-
if (has_rotation)
692-
inputs_number += 3;
693-
694684
auto input_idx = 0;
695685
params.inputs.resize(inputs_number);
696686
params.inputs[input_idx++] = query_tensor;
@@ -709,12 +699,6 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
709699
if (has_alibi)
710700
params.inputs[input_idx++] = alibi_tensor;
711701

712-
if (has_rotation) {
713-
params.inputs[input_idx++] = input_tensors[13];
714-
params.inputs[input_idx++] = input_tensors[14];
715-
params.inputs[input_idx++] = input_tensors[15];
716-
}
717-
718702
if (has_scores_output) {
719703
params.outputs.resize(2);
720704
params.outputs[1] = convert_data_tensor(impl_param.get_output_layout(1));
@@ -752,12 +736,6 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
752736
if (has_alibi)
753737
in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(11)});
754738

755-
if (has_rotation) {
756-
in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(13)});
757-
in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(14)});
758-
in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(15)});
759-
}
760-
761739
if (has_scores_output)
762740
out_tensor_to_offset_map.insert({1, out_offsets_map.at(1)});
763741

src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_sdpa_opt.cl

-6
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ KERNEL(pa_sdpa_opt)(
4343
#if HAS_ALIBI
4444
const __global ALIBI_INPUT_TYPE* alibi_slopes,
4545
#endif
46-
47-
#if HAS_ROTATED_BLOCKS
48-
const __global INPUT7_TYPE* rotated_block_indices,
49-
const __global INPUT8_TYPE* rotation_deltas,
50-
const __global INPUT9_TYPE* rotation_trig_lut,
51-
#endif
5246
__global OUTPUT_TYPE* output,
5347
#if PAGED_ATTENTION_SCORES_OUTPUT
5448
__global SOFTMAX_ACCUMULATOR_TYPE* softmax_results,

src/plugins/intel_gpu/tests/unit/test_cases/paged_attention_gpu_test.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "test_utils.h"
66
#include "random_generator.hpp"
77

8+
#include <intel_gpu/primitives/activation.hpp>
89
#include <intel_gpu/primitives/data.hpp>
910
#include <intel_gpu/primitives/eltwise.hpp>
1011
#include <intel_gpu/primitives/input_layout.hpp>
@@ -306,6 +307,12 @@ struct PagedAttentionManager {
306307
auto layout = mem->get_layout();
307308
layout.set_partial_shape(ov::PartialShape{ max_context_len[0], head_size });
308309

310+
if (rotated_block_indices.empty()) {
311+
auto empty_layout = mem->get_layout();
312+
empty_layout.set_partial_shape(ov::PartialShape{ 0, head_size });
313+
return test_engine.reinterpret_buffer(*mem, empty_layout);
314+
}
315+
309316
return test_engine.reinterpret_buffer(*mem, layout);
310317
}
311318

@@ -741,7 +748,7 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
741748
if (p.rotation_config.apply_rotation) {
742749
pa_inputs.push_back(input_info("rotated_block_indices"));
743750
pa_inputs.push_back(input_info("rotation_deltas"));
744-
pa_inputs.push_back(input_info("rotation_trig_lut"));
751+
pa_inputs.push_back(input_info("rotation_trig_lut_modified"));
745752
}
746753

747754
auto pa_prim = paged_attention("paged_attention", pa_inputs);
@@ -782,6 +789,9 @@ struct PagedAttentionTest : public ::testing::TestWithParam<T> {
782789
topology.add(input_layout("rotated_block_indices", rotated_block_indices_layout));
783790
topology.add(input_layout("rotation_deltas", rotation_deltas_layout));
784791
topology.add(input_layout("rotation_trig_lut", rotation_trig_lut_layout));
792+
793+
// add dummy activation operation to simulate an empty PA `rotation_trig_lut` buffer for shapes like [0, head_size]
794+
topology.add(activation("rotation_trig_lut_modified", input_info("rotation_trig_lut"), activation_func::none));
785795
}
786796

787797
ExecutionConfig config = get_test_default_config(get_test_engine());

0 commit comments

Comments
 (0)