Skip to content

Commit 34d16b8

Browse files
author
Vladimir Paramuzov
authored
[GPU] Move tuning cache loading to kernel selector (openvinotoolkit#15112)
* [GPU] Move tuning cache loading to kernel selector. Remove tuning modes * [GPU] Removed kernel runner
1 parent c1a9152 commit 34d16b8

38 files changed

+77
-1089
lines changed

src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp

-8
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717
#include <utility>
1818
#include <set>
1919

20-
namespace kernel_selector {
21-
class TuningCache;
22-
} // namespace kernel_selector
23-
2420
namespace cldnn {
2521

2622
struct topology;
@@ -248,9 +244,6 @@ struct program {
248244
kernel::ptr get_kernel(kernel_id id);
249245
kernels_cache& get_kernels_cache() const;
250246

251-
void load_tuning_cache();
252-
std::shared_ptr<kernel_selector::TuningCache> get_tuning_cache() const { return tuning_cache; }
253-
254247
// returns {-1, -1} if it failed to estimate by allocating given batch size
255248
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
256249

@@ -270,7 +263,6 @@ struct program {
270263
std::vector<program_node*> outputs;
271264
nodes_ordering processing_order;
272265
std::unique_ptr<pass_manager> pm;
273-
std::shared_ptr<kernel_selector::TuningCache> tuning_cache;
274266
bool is_body_program;
275267
int8_t is_subgroup_local_block_io_supported;
276268

src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp

-35
Original file line numberDiff line numberDiff line change
@@ -48,41 +48,6 @@ static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"G
4848
static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
4949
static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
5050
static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
51-
52-
/// @brief Tuning mode.
53-
enum class TuningMode {
54-
/// @brief Tuning is disabled.
55-
tuning_disabled,
56-
57-
/// @brief Tuning using the cached data (no on-line tuning for non-existing data).
58-
tuning_use_cache,
59-
60-
/// @brief Tuning using the cached data if exist, tune and update cache otherwise.
61-
tuning_tune_and_cache,
62-
63-
/// @brief Tuning using the cached data and update tasks.
64-
/// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
65-
/// No tuning for non-existing data.
66-
tuning_use_and_update,
67-
68-
/// @brief Retune the cache data even if it exists.
69-
tuning_retune_and_cache
70-
};
71-
72-
struct TuningConfig {
73-
TuningMode mode;
74-
std::string cache_file_path;
75-
76-
TuningConfig() : mode(TuningMode::tuning_disabled), cache_file_path("") {}
77-
};
78-
79-
inline std::ostream& operator<<(std::ostream& os, const TuningConfig& val) {
80-
os << val.cache_file_path;
81-
return os;
82-
}
83-
84-
static constexpr Property<TuningConfig, PropertyMutability::RW> tuning_config{"GPU_TUNING_CONFIG"};
85-
8651
static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
8752
static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
8853
static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};

src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "kernel_selector_helper.h"
1010
#include "arg_max_min/arg_max_min_kernel_selector.h"
1111
#include "arg_max_min/arg_max_min_kernel_base.h"
12-
#include "kernel_runner.h"
1312

1413
namespace cldnn {
1514
namespace ocl {

src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp

-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "impls/implementation_map.hpp"
99
#include "intel_gpu/runtime/error_handler.hpp"
1010
#include "kernel_selector_helper.h"
11-
#include "kernel_runner.h"
1211
#include "kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h"
1312
#include "kernel_selector/kernels/binary_convolution/binary_convolution_params.h"
1413
#include <algorithm>
@@ -75,14 +74,6 @@ struct binary_convolution_impl : typed_primitive_impl_ocl<binary_convolution> {
7574
uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1;
7675
params.dilation = {dilation_x, dilation_y, dilation_z};
7776

78-
const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);
79-
80-
if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
81-
tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
82-
optional_params.tuningParams.runner =
83-
std::make_shared<gpu::kernel_runner>(impl_param.get_program().get_engine(), impl_param.get_program().get_id(), true);
84-
}
85-
8677
return {params, optional_params};
8778
}
8879
};

src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp

-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "impls/implementation_map.hpp"
99
#include "intel_gpu/runtime/error_handler.hpp"
1010
#include "kernel_selector_helper.h"
11-
#include "kernel_runner.h"
1211
#include "convolution/convolution_kernel_selector.h"
1312
#include "convolution/convolution_params.h"
1413
#include <algorithm>
@@ -166,14 +165,6 @@ struct convolution_impl : typed_primitive_impl_ocl<convolution> {
166165

167166
auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance();
168167

169-
const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);
170-
171-
if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
172-
tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
173-
conv_optional_params.tuningParams.runner =
174-
std::make_shared<gpu::kernel_runner>(arg.get_program().get_engine(), arg.get_program().get_id(), true, true);
175-
}
176-
177168
auto best_kernel = kernel_selector.get_best_kernel(conv_params, conv_optional_params);
178169

179170
return make_unique<convolution_impl>(best_kernel);

src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "impls/implementation_map.hpp"
88
#include "intel_gpu/runtime/error_handler.hpp"
99
#include "kernel_selector_helper.h"
10-
#include "kernel_runner.h"
1110
#include "convolution/convolution_kernel_selector.h"
1211
#include "convolution/convolution_params.h"
1312
#include <algorithm>

src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "fully_connected/fully_connected_params.h"
1111

1212
#include "intel_gpu/runtime/error_handler.hpp"
13-
#include "kernel_runner.h"
1413

1514
#include "intel_gpu/primitives/reorder.hpp"
1615
#include "intel_gpu/primitives/input_layout.hpp"
@@ -119,7 +118,6 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
119118
params.quantization = kernel_selector::QuantizationType::NONE;
120119
}
121120

122-
optional_params.tuningParams.runner = std::make_shared<gpu::kernel_runner>(progam.get_engine(), progam.get_id(), true);
123121
return {params, optional_params};
124122
}
125123

src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h

-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ using namespace cldnn;
2828

2929
namespace cldnn {
3030
enum class data_types : size_t;
31-
enum class tuning_mode;
3231
struct format;
3332
struct layout;
3433
struct program;
@@ -65,7 +64,6 @@ using softmax_dim = kernel_selector::SoftmaxDim;
6564
using mean_subtruct_mode = kernel_selector::MeanSubtractMode;
6665
using mean_op = kernel_selector::MeanOp;
6766
using concat_axis = kernel_selector::ConcatAxis;
68-
using tuning_mode = kernel_selector::TuningMode;
6967
using sample_type = kernel_selector::ResampleType;
7068
using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
7169
using nearest_mode = kernel_selector::NearestMode;
@@ -101,7 +99,6 @@ kernel_selector::data_layout to_data_layout(format f);
10199
cldnn::format from_data_layout(kernel_selector::data_layout l);
102100
kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped);
103101
cldnn::format::type from_weights_layout(kernel_selector::weights_layout l);
104-
kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode);
105102
kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset = tensor {});
106103
kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false);
107104
layout from_weights_tensor(const kernel_selector::weights_tensor& t);

0 commit comments

Comments
 (0)