@@ -50,77 +50,84 @@ struct TrtDeleter
50
50
51
51
template <typename T>
52
52
using TrtUniquePtr = std::unique_ptr<T, TrtDeleter<T>>;
53
- using BatchConfig = std::array<int32_t , 3 >;
54
53
55
- struct BuildConfig
56
- {
57
- // type for calibration
58
- std::string calib_type_str;
54
+ // Type names of precisions.
55
+ enum PrecisionType { FP32 = 0 , FP16 = 1 , INT8 = 2 };
59
56
60
- // DLA core ID that the process uses
61
- int dla_core_id ;
57
+ // Type names of calibrations.
58
+ enum CalibrationType { ENTROPY = 0 , LEGACY = 1 , PERCENTILE = 2 , MINMAX = 3 } ;
62
59
63
- // flag for partial quantization in first layer
64
- bool quantize_first_layer; // For partial quantization
60
+ struct BatchOptConfig
61
+ {
62
+ /* *
63
+ * @brief Construct a new OptimizationConfig for a static shape inference.
64
+ *
65
+ * @param value
66
+ */
67
+ explicit BatchOptConfig (const int32_t value) : k_min(value), k_opt(value), k_max(value) {}
68
+
69
+ /* *
70
+ * @brief Construct a new OptimizationConfig for a dynamic shape inference.
71
+ *
72
+ * @param k_min
73
+ * @param k_opt
74
+ * @param k_max
75
+ */
76
+ BatchOptConfig (const int32_t k_min, const int32_t k_opt, const int32_t k_max)
77
+ : k_min(k_min), k_opt(k_opt), k_max(k_max)
78
+ {
79
+ }
65
80
66
- // flag for partial quantization in last layer
67
- bool quantize_last_layer ; // For partial quantization
81
+ int32_t k_min, k_opt, k_max;
82
+ } ; // struct BatchOptConfig
68
83
69
- // flag for per-layer profiler using IProfiler
70
- bool profile_per_layer;
84
+ struct BuildConfig
85
+ {
86
+ // type of precision
87
+ PrecisionType precision;
71
88
72
- // clip value for implicit quantization
73
- double clip_value; // For implicit quantization
89
+ // type for calibration
90
+ CalibrationType calibration;
74
91
75
- // Supported calibration type
76
- const std::array<std::string, 4 > valid_calib_type = { " Entropy " , " Legacy " , " Percentile " , " MinMax " } ;
92
+ BatchOptConfig batch_target;
93
+ BatchOptConfig batch_agent ;
77
94
78
95
/* *
79
96
* @brief Construct a new instance with default configurations.
80
- *
81
97
*/
82
98
BuildConfig ()
83
- : calib_type_str(" MinMax" ),
84
- dla_core_id (-1 ),
85
- quantize_first_layer(false ),
86
- quantize_last_layer(false ),
87
- profile_per_layer(false ),
88
- clip_value(0.0 )
99
+ : precision(PrecisionType::FP32),
100
+ calibration (CalibrationType::MINMAX),
101
+ batch_target(1 , 10 , 20 ),
102
+ batch_agent(1 , 30 , 50 ),
103
+ is_dynamic_(false )
89
104
{
90
105
}
91
106
92
107
/* *
93
- * @brief Construct a new instance with custom configurations .
108
+ * @brief Construct a new build config .
94
109
*
95
- * @param calib_type_str The name of calibration type which must be selected from [Entropy,
96
- * MinMax].
97
- * @param dla_core_id DLA core ID used by the process.
98
- * @param quantize_first_layer The flag whether to quantize first layer.
99
- * @param quantize_last_layer The flag whether to quantize last layer.
100
- * @param profile_per_layer The flag to profile per-layer in IProfiler.
101
- * @param clip_value The value to be clipped in quantization implicitly.
110
+ * @param is_dynamic
111
+ * @param precision
112
+ * @param calibration
102
113
*/
103
- explicit BuildConfig (
104
- const std::string & calib_type_str , const int dla_core_id = - 1 ,
105
- const bool quantize_first_layer = false , const bool quantize_last_layer = false ,
106
- const bool profile_per_layer = false , const double clip_value = 0.0 )
107
- : calib_type_str(calib_type_str),
108
- dla_core_id(dla_core_id ),
109
- quantize_first_layer(quantize_first_layer ),
110
- quantize_last_layer(quantize_last_layer ),
111
- profile_per_layer(profile_per_layer ),
112
- clip_value(clip_value )
114
+ BuildConfig (
115
+ const bool is_dynamic , const PrecisionType & precision = PrecisionType::FP32 ,
116
+ const CalibrationType & calibration = CalibrationType::MINMAX ,
117
+ const BatchOptConfig & batch_target = BatchOptConfig( 1 , 10 , 20 ),
118
+ const BatchOptConfig & batch_agent = BatchOptConfig( 1 , 30 , 50 ))
119
+ : precision(precision ),
120
+ calibration(calibration ),
121
+ batch_target(batch_target ),
122
+ batch_agent(batch_agent ),
123
+ is_dynamic_(is_dynamic )
113
124
{
114
- if (
115
- std::find (valid_calib_type.begin (), valid_calib_type.end (), calib_type_str) ==
116
- valid_calib_type.end ()) {
117
- std::stringstream message;
118
- message << " Invalid calibration type was specified: " << calib_type_str << std::endl
119
- << " Valid value is one of: [Entropy, (Legacy | Percentile), MinMax]" << std::endl
120
- << " Default calibration type will be used: MinMax" << std::endl;
121
- std::cerr << message.str ();
122
- }
123
125
}
126
+
127
+ bool is_dynamic () const { return is_dynamic_; }
128
+
129
+ private:
130
+ bool is_dynamic_;
124
131
}; // struct BuildConfig
125
132
126
133
class MTRBuilder
@@ -130,15 +137,12 @@ class MTRBuilder
130
137
* @brief Construct a new instance.
131
138
*
132
139
* @param model_path Path to engine or onnx file.
133
- * @param precision The name of precision type.
134
- * @param batch_config The configuration of min/opt/max batch.
135
- * @param max_workspace_size The max workspace size.
136
140
* @param build_config The configuration of build.
141
+ * @param max_workspace_size The max workspace size.
137
142
*/
138
143
MTRBuilder (
139
- const std::string & model_path, const std::string & precision,
140
- const BatchConfig & batch_config = {1 , 1 , 1 }, const size_t max_workspace_size = (1ULL << 30 ),
141
- const BuildConfig & build_config = BuildConfig());
144
+ const std::string & model_path, const BuildConfig & build_config = BuildConfig(),
145
+ const size_t max_workspace_size = (1ULL << 63 ));
142
146
143
147
/* *
144
148
* @brief Destroy the instance.
@@ -158,6 +162,12 @@ class MTRBuilder
158
162
*/
159
163
bool isInitialized () const ;
160
164
165
+ // Return true if the model supports dynamic shape inference.
166
+ bool isDynamic () const ;
167
+
168
+ // Set binding dimensions for specified for dynamic shape inference.
169
+ bool setBindingDimensions (int index, nvinfer1::Dims dimensions);
170
+
161
171
/* *
162
172
* @brief A wrapper of `nvinfer1::IExecuteContext::enqueueV2`.
163
173
*
@@ -178,6 +188,9 @@ class MTRBuilder
178
188
*/
179
189
bool loadEngine (const std::string & filepath);
180
190
191
+ // Create a cache path of engine file.
192
+ fs::path createEngineCachePath () const ;
193
+
181
194
/* *
182
195
* @brief Build engine from onnx file.
183
196
*
@@ -194,8 +207,6 @@ class MTRBuilder
194
207
TrtUniquePtr<nvinfer1::IExecutionContext> context_;
195
208
196
209
fs::path model_filepath_;
197
- std::string precision_;
198
- BatchConfig batch_config_;
199
210
size_t max_workspace_size_;
200
211
std::unique_ptr<const BuildConfig> build_config_;
201
212
0 commit comments