@@ -190,6 +190,13 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
190
190
// workspace among all algorithms (requires an initial call
191
191
// to FindEx with workspace size 0).
192
192
workspace_bytes = workspace_limit_bytes * MAX_WORKSPACE_RATIO;
193
+ // Sometimes closer to zero we might have memory info diverged from
194
+ // reality. If try_reserve fails, it updates the info internally and
195
+ // we have to re-evaluate the workspace size.
196
+ if (!WORKSPACE.try_reserve (workspace_bytes)) {
197
+ GPUMemory::GetInfo (&workspace_limit_bytes, &total_memory);
198
+ workspace_bytes = workspace_limit_bytes * MAX_WORKSPACE_RATIO;
199
+ }
193
200
// Avoid seeking for an algorithm in subsequent iterations
194
201
use_algo_seeker_ = false ;
195
202
}
@@ -203,7 +210,8 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
203
210
this ->GetConvAlgo (bottom, top, workspace_bytes);
204
211
break ;
205
212
case ConvolutionParameter_CuDNNConvolutionAlgorithmSeeker_FINDEX:
206
- this ->FindExConvAlgo (bottom, top, workspace_bytes);
213
+ WORKSPACE.reserve (workspace_bytes);
214
+ this ->FindExConvAlgo (bottom, top);
207
215
break ;
208
216
default :
209
217
LOG (ERROR) << " Wrong value for cudnn_convolution_algo_seeker" ;
@@ -275,8 +283,7 @@ void CuDNNConvolutionLayer<Dtype>::GetConvAlgo(
275
283
template <typename Dtype>
276
284
void CuDNNConvolutionLayer<Dtype>::FindExConvAlgo(
277
285
const vector<Blob<Dtype>*>& bottom,
278
- const vector<Blob<Dtype>*>& top,
279
- const size_t workspace_bytes) {
286
+ const vector<Blob<Dtype>*>& top) {
280
287
281
288
// Number of algorithms we want to consider
282
289
// Since we only consider one algorithm (the fastest), set this to 1
@@ -293,7 +300,6 @@ void CuDNNConvolutionLayer<Dtype>::FindExConvAlgo(
293
300
void *tmp_weights;
294
301
const int tmp_weights_size = sizeof (Dtype) * weight_offset_;
295
302
GPUMemory::allocate (&tmp_weights, tmp_weights_size);
296
- WORKSPACE.reserve (workspace_bytes);
297
303
298
304
for (int i = 0 ; i < bottom.size (); i++) {
299
305
// Find forward algorithm
@@ -463,6 +469,12 @@ void CuDNNConvolutionLayer<Dtype>::UpdateWorkspaceDemand(int size) {
463
469
WORKSPACE_SIZE = workspace_bwd_data_sizes_[i];
464
470
}
465
471
}
472
+ // We might grab too much before calling Get/FindEx.
473
+ // Reserve the only amount needed.
474
+ if (WORKSPACE_SIZE < WORKSPACE.size ()) {
475
+ WORKSPACE.release ();
476
+ WORKSPACE.reserve (WORKSPACE_SIZE);
477
+ } // else: reserve in Fwd/Bwd calls
466
478
}
467
479
468
480
template <typename Dtype>
0 commit comments