Skip to content

Commit ef423a4

Browse files
committed
Hotfix resolving meminfo divergence corner case
1 parent 4e36e90 commit ef423a4

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

include/caffe/layers/cudnn_conv_layer.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
8787
bool use_modest_workspace_;
8888
#if CUDNN_VERSION_MIN(5, 0, 0)
8989
void FindExConvAlgo(const vector<Blob<Dtype>*>& bottom,
90-
const vector<Blob<Dtype>*>& top,
91-
const size_t workspace_bytes);
90+
const vector<Blob<Dtype>*>& top);
9291
#endif
9392
void GetConvAlgo(const vector<Blob<Dtype>*>& bottom,
9493
const vector<Blob<Dtype>*>& top,

src/caffe/layers/cudnn_conv_layer.cpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,13 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
190190
// workspace among all algorithms (requires an initial call
191191
// to FindEx with workspace size 0).
192192
workspace_bytes = workspace_limit_bytes * MAX_WORKSPACE_RATIO;
193+
// Sometimes closer to zero we might have memory info diverged from
194+
// reality. If try_reserve fails, it updates the info internally and
195+
// we have to re-evaluate the workspace size.
196+
if (!WORKSPACE.try_reserve(workspace_bytes)) {
197+
GPUMemory::GetInfo(&workspace_limit_bytes, &total_memory);
198+
workspace_bytes = workspace_limit_bytes * MAX_WORKSPACE_RATIO;
199+
}
193200
// Avoid seeking for an algorithm in subsequent iterations
194201
use_algo_seeker_ = false;
195202
}
@@ -203,7 +210,8 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
203210
this->GetConvAlgo(bottom, top, workspace_bytes);
204211
break;
205212
case ConvolutionParameter_CuDNNConvolutionAlgorithmSeeker_FINDEX:
206-
this->FindExConvAlgo(bottom, top, workspace_bytes);
213+
WORKSPACE.reserve(workspace_bytes);
214+
this->FindExConvAlgo(bottom, top);
207215
break;
208216
default:
209217
LOG(ERROR) << "Wrong value for cudnn_convolution_algo_seeker";
@@ -275,8 +283,7 @@ void CuDNNConvolutionLayer<Dtype>::GetConvAlgo(
275283
template <typename Dtype>
276284
void CuDNNConvolutionLayer<Dtype>::FindExConvAlgo(
277285
const vector<Blob<Dtype>*>& bottom,
278-
const vector<Blob<Dtype>*>& top,
279-
const size_t workspace_bytes) {
286+
const vector<Blob<Dtype>*>& top) {
280287

281288
// Number of algorithms we want to consider
282289
// Since we only consider one algorithm (the fastest), set this to 1
@@ -293,7 +300,6 @@ void CuDNNConvolutionLayer<Dtype>::FindExConvAlgo(
293300
void *tmp_weights;
294301
const int tmp_weights_size = sizeof(Dtype) * weight_offset_;
295302
GPUMemory::allocate(&tmp_weights, tmp_weights_size);
296-
WORKSPACE.reserve(workspace_bytes);
297303

298304
for (int i = 0; i < bottom.size(); i++) {
299305
// Find forward algorithm
@@ -463,6 +469,12 @@ void CuDNNConvolutionLayer<Dtype>::UpdateWorkspaceDemand(int size) {
463469
WORKSPACE_SIZE = workspace_bwd_data_sizes_[i];
464470
}
465471
}
472+
// We might grab too much before calling Get/FindEx.
473+
// Reserve the only amount needed.
474+
if (WORKSPACE_SIZE < WORKSPACE.size()) {
475+
WORKSPACE.release();
476+
WORKSPACE.reserve(WORKSPACE_SIZE);
477+
} // else: reserve in Fwd/Bwd calls
466478
}
467479

468480
template <typename Dtype>

0 commit comments

Comments
 (0)