@@ -927,7 +927,7 @@ class gemm_gpu_tests: public ::testing::Test {
927
927
ov::Shape ref_input1_broadcasted_shape;
928
928
ov::Shape ref_input1_shape;
929
929
ov::Shape ref_output_shape;
930
-
930
+
931
931
ref_input0_shape = { BATCH_SIZE, 16 , M_SIZE, K_SIZE };
932
932
ref_input1_broadcasted_shape = { N_SIZE, BATCH_SIZE, 16 , K_SIZE };
933
933
ref_input1_shape = { BATCH_SIZE, 16 , K_SIZE, N_SIZE };
@@ -1063,7 +1063,7 @@ class gemm_gpu_tests: public ::testing::Test {
1063
1063
ov::Shape ref_input1_reshaped_shape;
1064
1064
ov::Shape ref_input1_shape;
1065
1065
ov::Shape ref_output_shape;
1066
-
1066
+
1067
1067
ref_input0_shape = { BATCH_SIZE, 32 , M_SIZE, K_SIZE };
1068
1068
ref_input1_broadcasted_shape = { N_SIZE, BATCH_SIZE, 2 , 16 , K_SIZE };
1069
1069
ref_input1_reshaped_shape = { N_SIZE, BATCH_SIZE, 32 , K_SIZE };
@@ -1313,16 +1313,22 @@ class gemm_gpu_tests: public ::testing::Test {
1313
1313
output_shape_default = { M_SIZE, N_SIZE };
1314
1314
} else if (num_dims == 3 ) {
1315
1315
input0_shape_default = { BATCH_SIZE, M_SIZE, K_SIZE };
1316
- input1_shape_default = { BATCH_SIZE, K_SIZE, N_SIZE };
1316
+ input1_shape_default = { BATCH_SIZE, K_SIZE, N_SIZE };
1317
1317
output_shape_default = { BATCH_SIZE, M_SIZE, N_SIZE };
1318
1318
} else if (num_dims == 4 ) {
1319
1319
input0_shape_default = { BATCH_SIZE, 1 , M_SIZE, K_SIZE};
1320
- input1_shape_default = { BATCH_SIZE, 1 , K_SIZE, N_SIZE};
1320
+ input1_shape_default = { BATCH_SIZE, 1 , K_SIZE, N_SIZE};
1321
1321
output_shape_default = { BATCH_SIZE, 1 , M_SIZE, N_SIZE };
1322
1322
}
1323
1323
}
1324
1324
1325
- void test_transpose_matmul_f32 (size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector<size_t > BMKN, std::vector<int64_t > input0_order, std::vector<int64_t > input1_order) {
1325
+ void test_transpose_matmul_f32 (size_t num_dims,
1326
+ bool is_input_dynamic,
1327
+ bool is_caching_test,
1328
+ std::vector<size_t > BMKN,
1329
+ std::vector<int64_t > input0_order,
1330
+ std::vector<int64_t > input1_order,
1331
+ std::vector<int64_t > output_order = {}) {
1326
1332
tests::random_generator rg;
1327
1333
rg.set_seed (GET_SUITE_NAME);
1328
1334
@@ -1337,6 +1343,7 @@ class gemm_gpu_tests: public ::testing::Test {
1337
1343
set_default_shapes (num_dims, BMKN, input0_shape_default, input1_shape_default, output_shape_default);
1338
1344
ov::Shape input0_shape (input0_shape_default.size ());
1339
1345
ov::Shape input1_shape (input1_shape_default.size ());
1346
+ ov::Shape output_shape (output_shape_default.size ());
1340
1347
1341
1348
for (size_t dim = 0 ; dim < input0_shape_default.size (); ++dim) {
1342
1349
input0_shape[input0_order[dim]] = input0_shape_default[dim];
@@ -1346,6 +1353,12 @@ class gemm_gpu_tests: public ::testing::Test {
1346
1353
input1_shape[input1_order[dim]] = input1_shape_default[dim];
1347
1354
}
1348
1355
1356
+ if (!output_order.empty ()) {
1357
+ for (size_t dim = 0 ; dim < output_shape_default.size (); ++dim) {
1358
+ output_shape[output_order[dim]] = output_shape_default[dim];
1359
+ }
1360
+ }
1361
+
1349
1362
if (is_input_dynamic) {
1350
1363
input0_layout = layout{ov::PartialShape::dynamic (input0_shape.size ()), data_types::f32, format::bfyx};
1351
1364
input1_layout = layout{ov::PartialShape::dynamic (input1_shape.size ()), data_types::f32, format::bfyx};
@@ -1366,7 +1379,7 @@ class gemm_gpu_tests: public ::testing::Test {
1366
1379
topology topology;
1367
1380
topology.add (input_layout (" input0" , input0_layout),
1368
1381
input_layout (" input1" , input1_layout),
1369
- gemm (" gemm" , { input_info (" input0" ), input_info (" input1" ) }, data_types::f32, {}, {}, {}, {}, input0_order, input1_order)
1382
+ gemm (" gemm" , { input_info (" input0" ), input_info (" input1" ) }, data_types::f32, {}, {}, {}, {}, input0_order, input1_order, output_order )
1370
1383
);
1371
1384
1372
1385
ExecutionConfig config = get_test_default_config (engine);
@@ -1415,6 +1428,19 @@ class gemm_gpu_tests: public ::testing::Test {
1415
1428
false ,
1416
1429
false );
1417
1430
1431
+ if (!output_order.empty ()) {
1432
+ std::vector<float > out_data_transposed (ov::shape_size (output_shape_default));
1433
+
1434
+ ov::reference::transpose ((const char *)(ref_out_data.data ()),
1435
+ (char *)(out_data_transposed.data ()),
1436
+ output_shape_default,
1437
+ sizeof (float ),
1438
+ output_order,
1439
+ output_shape);
1440
+
1441
+ ref_out_data = out_data_transposed;
1442
+ }
1443
+
1418
1444
ASSERT_EQ (output_ptr.size (), ref_out_data.size ());
1419
1445
1420
1446
const auto abs_error = 0.0001 ;
@@ -1614,6 +1640,10 @@ TEST_F(gemm_gpu_tests, transpose_matmul_dynamic_4d_f32) {
1614
1640
this ->test_transpose_matmul_f32 (4 , true , false , /* BMKN*/ {19 , 37 , 23 , 29 }, /* input0_order*/ {0 , 2 , 3 , 1 }, /* input1_order*/ {1 , 2 , 3 , 0 });
1615
1641
}
1616
1642
1643
+ TEST_F (gemm_gpu_tests, transpose_matmul_dynamic_4d_f32_n_tile_32_output_ylast) {
1644
+ this ->test_transpose_matmul_f32 (4 , true , false , /* BMKN*/ {1 , 128 , 1 , 9 }, /* input0_order*/ {0 , 1 , 2 , 3 }, /* input1_order*/ {0 , 1 , 2 , 3 }, /* output_order*/ {0 , 1 , 3 , 2 });
1645
+ }
1646
+
1617
1647
TEST_F (gemm_gpu_tests, transpose_matmul_static_4d_f16) {
1618
1648
this ->test_transpose_matmul_f16 (4 , false , false , /* BMKN*/ {19 , 37 , 23 , 29 }, /* input0_order*/ {0 , 2 , 3 , 1 }, /* input1_order*/ {1 , 2 , 3 , 0 });
1619
1649
}
0 commit comments