Skip to content

Commit 51906fe

Browse files
authoredApr 1, 2024
[GPU] Fix gemm_tiled_opt kernel bug for tile_n_size 32 (openvinotoolkit#23776)
### Details: - Fixed crash and accuracy issue for n_tile_size 32 + transposed input for static shape - Fixed gemm_tiled_opt test to apply more various combinations & added more TCs ### Tickets: - 137358
1 parent 91d922b commit 51906fe

File tree

2 files changed

+109
-139
lines changed

2 files changed

+109
-139
lines changed
 

‎src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl

+10-6
Original file line numberDiff line numberDiff line change
@@ -412,16 +412,12 @@ KERNEL(gemm_tiled_opt)(
412412
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read[subtile_k_id], simd_local_id)),
413413
b_tile[subtile_k_id * SIMD_WIDTH + simd_local_id], c_tile[dot_id]);
414414
#else // TILE_K > SIMD_WIDTH
415-
#if IS_DYNAMIC && B_VEC_SIZE > 1
416-
#if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
415+
#if B_VEC_SIZE > 1 && TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
417416
MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp;
418417
unroll_for (uint b_elem = 0; b_elem < B_VEC_SIZE; ++b_elem) {
419418
b_tile_tmp[b_elem] = b_tile[b_elem][simd_local_id];
420419
}
421420
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_local_id)), b_tile_tmp, c_tile[dot_id]);
422-
#else
423-
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_local_id)), b_tile[simd_local_id], c_tile[dot_id]);
424-
#endif
425421
#else
426422
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_local_id)), b_tile[simd_local_id], c_tile[dot_id]);
427423
#endif
@@ -464,7 +460,15 @@ KERNEL(gemm_tiled_opt)(
464460
// Tile C calculation for TN, TT cases
465461
unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) {
466462
unroll_for (uint simd_local_id = 0; simd_local_id < SIMD_WIDTH; simd_local_id++) {
467-
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_tile[dot_id], simd_local_id)), b_tile[simd_local_id], c_tile[dot_id]);
463+
#if B_VEC_SIZE > 1 && TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
464+
MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp;
465+
unroll_for (uint b_elem = 0; b_elem < B_VEC_SIZE; ++b_elem) {
466+
b_tile_tmp[b_elem] = b_tile[b_elem][simd_local_id];
467+
}
468+
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_tile[dot_id], simd_local_id)), b_tile_tmp, c_tile[dot_id]);
469+
#else
470+
c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_tile[dot_id], simd_local_id)), b_tile[simd_local_id], c_tile[dot_id]);
471+
#endif
468472
}
469473
} // Tile C calculation for TN, TT cases end
470474
#endif // !TRANSPOSE_INPUT0

0 commit comments

Comments
 (0)