@@ -163,6 +163,33 @@ struct paged_attention_impl : multi_stage_primitive<paged_attention> {
163
163
config.x_block_size = desc->x_block_size ;
164
164
config.max_context_len = 1 ;
165
165
166
+ if (!impl_param.is_dynamic ()) {
167
+ auto query_shape = impl_param.get_input_layout (0 ).get_shape ();
168
+ auto key_cache_shape = impl_param.get_input_layout (3 ).get_shape ();
169
+ auto value_cache_shape = impl_param.get_input_layout (4 ).get_shape ();
170
+
171
+ auto actual_head_size = value_cache_shape[2 ];
172
+ auto actual_heads_num = query_shape[2 ] / actual_head_size;
173
+ auto actual_kv_heads_num = value_cache_shape[1 ];
174
+ auto actual_block_size = value_cache_shape[3 ];
175
+ auto actual_x_block_size = key_cache_shape[4 ];
176
+
177
+ bool valid_params = config.head_size == actual_head_size &&
178
+ config.heads_num == actual_heads_num &&
179
+ config.kv_heads_num == actual_kv_heads_num &&
180
+ config.block_size == actual_block_size &&
181
+ config.x_block_size == actual_x_block_size;
182
+
183
+ OPENVINO_ASSERT (valid_params, " [GPU] Got unexpected parameters for PA operation. " ,
184
+ " Currently they need to be specified explicitly (this should be fixed soon by PA model conversion improvement). " ,
185
+ " Please use the following environment variables for proper PA configuration: " ,
186
+ " PA_HEAD_SIZE=" , actual_head_size, " " ,
187
+ " PA_HEADS_NUM=" , actual_heads_num, " " ,
188
+ " PA_KV_HEADS_NUM=" , actual_kv_heads_num, " " ,
189
+ " PA_BLOCK_SIZE=" , actual_block_size, " " ,
190
+ " PA_X_BLOCK_SIZE=" , actual_x_block_size);
191
+ }
192
+
166
193
const size_t simd_size = 16 ;
167
194
OPENVINO_ASSERT (config.head_size % simd_size == 0 , " [GPU] Head size is expected to be divisible by 16" );
168
195
0 commit comments