|
10 | 10 | #include <intel_gpu/primitives/reshape.hpp>
|
11 | 11 | #include <intel_gpu/primitives/permute.hpp>
|
12 | 12 | #include <intel_gpu/primitives/reorder.hpp>
|
| 13 | +#include <intel_gpu/primitives/eltwise.hpp> |
13 | 14 |
|
14 | 15 | #include <cstddef>
|
15 | 16 |
|
@@ -423,3 +424,58 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_depth_first) {
|
423 | 424 | TEST(export_import_depth_to_space_fp32_gpu, d1822_bs2_depth_first) {
|
424 | 425 | test_depth_to_space_fp32_gpu_d1822_bs2_depth_first<float>(true);
|
425 | 426 | }
|
| 427 | + |
| 428 | +static void test_depth_to_space_fp16_input_fp32_output(bool is_caching_test) { |
| 429 | + auto& engine = get_test_engine(); |
| 430 | + |
| 431 | + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); |
| 432 | + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 3, 2 } }); |
| 433 | + |
| 434 | + size_t block_size = 1; |
| 435 | + |
| 436 | + set_values(input, { |
| 437 | + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, |
| 438 | + 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, |
| 439 | + 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, |
| 440 | + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f |
| 441 | + }); |
| 442 | + set_values(weights, { |
| 443 | + ov::float16(1.0f), ov::float16(2.0f), ov::float16(1.0f), |
| 444 | + ov::float16(2.0f), ov::float16(1.0f), ov::float16(2.0f) |
| 445 | + }); |
| 446 | + |
| 447 | + // Apply existed topology that makes kernel build failure because of input and output data types are different. |
| 448 | + topology topology; |
| 449 | + topology.add(cldnn::input_layout("input", input->get_layout())); |
| 450 | + topology.add(cldnn::data("weights", weights)); |
| 451 | + topology.add(cldnn::reorder("reorder_input", input_info("input"), cldnn::layout(data_types::f16, format::byxf, { 1, 1, 4, 5 }))); |
| 452 | + topology.add(cldnn::convolution("conv", input_info("reorder_input"), "weights", "", 1, { 2, 1 }, {1, 1}, {0, 0}, {0, 0}, false)); |
| 453 | + topology.add(cldnn::depth_to_space("depth_to_space", input_info("conv"), block_size, depth_to_space_mode::depth_first)); |
| 454 | + topology.add(cldnn::activation("activate", input_info("depth_to_space"), cldnn::activation_func::relu_negative_slope, {0.25f, 0.f})); |
| 455 | + topology.add(cldnn::reorder("convert:output", input_info("activate"), format::any, data_types::f32, {}, reorder_mean_mode::subtract, padding(), true)); |
| 456 | + topology.add(cldnn::reorder("result:output/sink_port_0", input_info("convert:output"), format::bfyx, data_types::f32, {}, reorder_mean_mode::subtract, padding(), false)); |
| 457 | + |
| 458 | + ExecutionConfig config = get_test_default_config(engine); |
| 459 | + config.set_property(ov::intel_gpu::optimize_data(true)); |
| 460 | + |
| 461 | + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); |
| 462 | + |
| 463 | + network->set_input_data("input", input); |
| 464 | + |
| 465 | + auto outputs = network->execute(); |
| 466 | + |
| 467 | + auto output = outputs.at("result:output/sink_port_0").get_memory(); |
| 468 | + cldnn::mem_lock<float> output_ptr(output, get_test_stream()); |
| 469 | + |
| 470 | + std::vector<float> expected_results = { |
| 471 | + 24.0f, 24.0f, 32.0f, 28.0f |
| 472 | + }; |
| 473 | + |
| 474 | + for (size_t i = 0; i < expected_results.size(); ++i) { |
| 475 | + ASSERT_EQ(expected_results[i], output_ptr[i]); |
| 476 | + } |
| 477 | +} |
| 478 | + |
| 479 | +TEST(depth_to_space_gpu, fp16_input_fp32_output) { |
| 480 | + test_depth_to_space_fp16_input_fp32_output(false); |
| 481 | +} |
0 commit comments