Skip to content

Commit f494c1b

Browse files
committed
[GPU] Fix crop primitive execution with dynamic paddings input
1 parent cfbc998 commit f494c1b

File tree

2 files changed

+89
-2
lines changed

2 files changed

+89
-2
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,19 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
5555
}
5656

5757
update_shapes(*_kernel_data.params, impl_param);
58-
auto runtime_offset = convert_data_tensor(impl_param.get_input_layout(), impl_param.input_offsets[0]).GetFirstElementOffset();
58+
59+
// Reset input_layout padding as the offset configured by crop should affect only "data"
60+
// area and shouldn't depend on input_layout paddings.
61+
// For example, for an input shape like: [1, 32, 128 (pad_before=512, pad_after=0), 8]
62+
// with crop_axis=2 and split_lengths = {64, 64},
63+
// runtime_offset should be set in terms of [1, 32, 128, 8] shape, as the kernel reads data
64+
// using "input[GET_INDEX(INPUT, order) + runtime_offset]", where GET_INDEX already reflects input
65+
// data paddings.
66+
// So crop.out0's runtime_offset=0 and crop.out1's runtime_offset=512.
67+
auto input_layout = impl_param.get_input_layout();
68+
input_layout.data_padding = padding();
69+
70+
auto runtime_offset = convert_data_tensor(input_layout, impl_param.input_offsets[0]).GetFirstElementOffset();
5971
kernel_selector::ScalarDescriptor s;
6072
s.t = kernel_selector::ScalarDescriptor::Types::UINT32;
6173
s.v.u32 = static_cast<uint32_t>(runtime_offset);

src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp

+76-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ TEST(crop_gpu, basic_in2x2x2x3_crop_all) {
9797
auto output = outputs.at("crop").get_memory();
9898
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
9999

100-
printf("Results:\n");
101100
for (int b = 0; b < crop_batch_num; ++b) { //B
102101
for (int f = 0; f < crop_feature_num; ++f) { //F
103102
for (int y = 0; y < crop_y_size; ++y) { //Y
@@ -1477,6 +1476,82 @@ TEST(crop_gpu, dynamic_in1x4x1x1_varaidic_split) {
14771476
ASSERT_EQ(output_ptr_2[i], out2[i]);
14781477
}
14791478

1479+
TEST(crop_gpu, dynamic_input_padding_varaidic_split) {
1480+
tests::random_generator rg(GET_SUITE_NAME);
1481+
auto& engine = get_test_engine();
1482+
1483+
auto batch_num = 1;
1484+
auto feature_num = 4;
1485+
auto y_size = 128;
1486+
auto x_size = 4;
1487+
1488+
auto axis = 2;
1489+
auto input_y_pad_before = 64;
1490+
auto input_y_pad_after = 32;
1491+
1492+
auto input_dyn_layout = layout{ ov::PartialShape{-1, feature_num, y_size, x_size}, data_types::f32, format::bfyx };
1493+
input_dyn_layout.data_padding._dynamic_dims_mask[axis] = 1;
1494+
1495+
auto input_actual_layout = layout{ ov::PartialShape{batch_num, feature_num, y_size, x_size}, data_types::f32, format::bfyx };
1496+
input_actual_layout.data_padding._lower_size[axis] = input_y_pad_before;
1497+
input_actual_layout.data_padding._upper_size[axis] = input_y_pad_after;
1498+
1499+
auto input_mem = engine.allocate_memory(input_actual_layout);
1500+
auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx });
1501+
auto splits_length_mem = engine.allocate_memory({ {2}, data_types::i64, format::bfyx });
1502+
1503+
auto elements_count = input_mem->size() / sizeof(float);
1504+
auto input_data = rg.generate_random_1d<float>(elements_count, -10, 10);
1505+
1506+
cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split;
1507+
topology topology;
1508+
topology.add(input_layout("input", input_dyn_layout));
1509+
topology.add(data("axis", axis_mem));
1510+
topology.add(data("splits_length", splits_length_mem));
1511+
topology.add(crop("variadic_split.out0", { input_info("input"), input_info("axis"), input_info("splits_length") }, tensor(1), tensor(0), op_mode, 0, axis));
1512+
topology.add(crop("variadic_split.out1", { input_info("input"), input_info("axis"), input_info("splits_length") }, tensor(1), tensor(0), op_mode, 1, axis));
1513+
1514+
std::vector<int64_t> splits_vec = { 64, 64 };
1515+
1516+
set_values(input_mem, input_data);
1517+
set_values(splits_length_mem, splits_vec);
1518+
set_values<int64_t>(axis_mem, {axis});
1519+
1520+
ExecutionConfig config = get_test_default_config(engine);
1521+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
1522+
config.set_property(ov::intel_gpu::optimize_data(true));
1523+
config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
1524+
1525+
network network(engine, topology, config);
1526+
network.set_input_data("input", input_mem);
1527+
1528+
auto check_output = [&](size_t output_idx, cldnn::network_output output) {
1529+
auto y_start = std::accumulate(splits_vec.begin(), splits_vec.begin() + output_idx, 0);
1530+
auto y_size_output = splits_vec[output_idx];
1531+
1532+
auto output_layout = output.get_layout();
1533+
auto output_mem = output.get_memory();
1534+
cldnn::mem_lock<float> output_ptr(output_mem, get_test_stream());
1535+
for (size_t b = 0; b < static_cast<size_t>(batch_num); b++) {
1536+
for (size_t f = 0; f < static_cast<size_t>(feature_num); f++) {
1537+
for (size_t y = 0; y < static_cast<size_t>(y_size_output); y++) {
1538+
for (size_t x = 0; x < static_cast<size_t>(x_size); x++) {
1539+
auto input_offset = input_actual_layout.get_linear_offset(cldnn::tensor(b, f, x, y + y_start, 0, 0));
1540+
auto output_offset = output_layout.get_linear_offset(cldnn::tensor(b, f, x, y, 0, 0));
1541+
1542+
ASSERT_EQ(input_data[input_offset], output_ptr[output_offset]);
1543+
}
1544+
}
1545+
}
1546+
}
1547+
};
1548+
1549+
auto outputs = network.execute();
1550+
1551+
check_output(0, outputs.at("variadic_split.out0"));
1552+
check_output(1, outputs.at("variadic_split.out1"));
1553+
}
1554+
14801555
TEST(crop_gpu, static_split_batch) {
14811556
auto& engine = get_test_engine();
14821557

0 commit comments

Comments
 (0)