Skip to content

Commit 669f038

Browse files
committed
Enable oneDNN gemm and FC back and disable manual input preparing for prompt stage
1 parent e72d245 commit 669f038

File tree

3 files changed

+2
-1
lines changed

3 files changed

+2
-1
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
9393
auto is_prefill_memory = instance.input_memory_ptr(5);
9494
mem_lock<uint8_t, mem_lock_type::read> is_prefill_memory_lock(is_prefill_memory, service_stream);
9595
bool is_prefill_stage = is_prefill_memory_lock[0];
96+
is_prefill_stage = false;
9697

9798
if (!is_prefill_stage) {
9899
args.inputs = { instance.input_memory_ptr(0), /* query */

src/plugins/intel_gpu/src/graph/layout_optimizer.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,6 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
16901690
}
16911691
// TODO: uncomment this code when onednn gemm implementations will have real perf improvements vs cldnn
16921692
} else if (node.is_type<fully_connected>() || node.is_type<gemm>()) {
1693-
return impl_types::ocl;
16941693
if (!_optimization_attributes.use_onednn_impls)
16951694
return impl_types::ocl;
16961695

src/plugins/intel_gpu/src/graph/paged_attention.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ void paged_attention_inst::update_shape_info_tensor(const kernel_impl_params& pa
4545
auto is_prefill_memory = this->input_memory_ptr(5);
4646
mem_lock<uint8_t, mem_lock_type::read> is_prefill_memory_lock(is_prefill_memory, service_stream);
4747
bool is_prefill_stage = is_prefill_memory_lock[0];
48+
is_prefill_stage = false;
4849
if (!is_prefill_stage) {
4950
parent::update_shape_info_tensor(params);
5051
} else {

0 commit comments

Comments
 (0)