Skip to content

Commit 4ff8e07

Browse files
committed
Merge pull request #158 from pooyadavoodi/caffe-0.15
Use cuDNN routine FindEx to find the best algorithm.
2 parents 846f3e4 + 4248b1f commit 4ff8e07

File tree

5 files changed

+377
-57
lines changed

5 files changed

+377
-57
lines changed

include/caffe/layers/cudnn_conv_layer.hpp

+19-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
3434
public:
3535
explicit CuDNNConvolutionLayer(const LayerParameter& param)
3636
: ConvolutionLayer<Dtype>(param), handles_setup_(false),
37-
backward_passed_ctr_(0) {}
37+
use_algo_seeker_(true), use_modest_workspace_(true) {}
3838
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
3939
const vector<Blob<Dtype>*>& top);
4040
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
@@ -65,7 +65,24 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
6565
size_t *workspace_bwd_data_sizes_;
6666
size_t *workspace_bwd_filter_sizes_;
6767
GPUMemory::Workspace workspace;
68-
int backward_passed_ctr_;
68+
69+
private:
70+
bool use_algo_seeker_;
71+
bool use_modest_workspace_;
72+
void FindExConvAlgo(const vector<Blob<Dtype>*>& bottom,
73+
const vector<Blob<Dtype>*>& top,
74+
const size_t workspace_bytes);
75+
void GetConvAlgo(const vector<Blob<Dtype>*>& bottom,
76+
const vector<Blob<Dtype>*>& top,
77+
const size_t workspace_bytes);
78+
79+
vector<cudnnTensorDescriptor_t> cached_bottom_descs_;
80+
vector<cudnnConvolutionDescriptor_t> cached_conv_descs_;
81+
bool IsBottomDescChanged(const vector<Blob<Dtype>*>& bottom);
82+
bool IsConvDescChanged(const vector<Blob<Dtype>*>& bottom);
83+
84+
bool use_reshape_;
85+
bool initialized_cached_descs_;
6986
};
7087
#endif
7188

0 commit comments

Comments
 (0)