Skip to content

Commit c9eda39

Browse files
committed
CUB Memory Manager + cuDNN v4 and v5 support
1 parent 5b5b438 commit c9eda39

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1194
-319
lines changed

.travis.yml

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ env:
88
- WITH_CUDA=true WITH_CMAKE=true WITH_IO=true
99
- WITH_CUDA=false WITH_CMAKE=false WITH_IO=false
1010
- WITH_CUDA=false WITH_CMAKE=true WITH_IO=false PYTHON_VERSION=3
11+
# Currently there is no way to install cudnn via apt-get. Uncomment when it's available.
12+
# - WITH_CUDA=true WITH_CMAKE=false WITH_IO=true WITH_CUDNN=true
13+
# - WITH_CUDA=true WITH_CMAKE=true WITH_IO=true WITH_CUDNN=true
1114

1215
language: cpp
1316

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ configure_file(cmake/Templates/caffe_config.h.in "${PROJECT_BINARY_DIR}/caffe_co
6464

6565
# ---[ Includes
6666
set(Caffe_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
67-
include_directories(${Caffe_INCLUDE_DIR} ${PROJECT_BINARY_DIR})
67+
set(THIRDPARTY_DIR ${PROJECT_SOURCE_DIR}/3rdparty)
68+
include_directories(${Caffe_INCLUDE_DIR} ${PROJECT_BINARY_DIR} ${THIRDPARTY_DIR})
6869
include_directories(BEFORE src) # This is needed for gtest.
6970

7071
# ---[ Subdirectories

Makefile

+15-4
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,19 @@ $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example.)
77
endif
88
include $(CONFIG_FILE)
99

10+
# Rectify input parameters
11+
ifeq ($(CPU_ONLY),1)
12+
USE_CUDNN=0
13+
endif
14+
15+
PROJECT_DIR=$(PWD)
16+
1017
BUILD_DIR_LINK := $(BUILD_DIR)
1118
ifeq ($(RELEASE_BUILD_DIR),)
12-
RELEASE_BUILD_DIR := .$(BUILD_DIR)_release
19+
RELEASE_BUILD_DIR := $(PROJECT_DIR)/.$(BUILD_DIR)_release
1320
endif
1421
ifeq ($(DEBUG_BUILD_DIR),)
15-
DEBUG_BUILD_DIR := .$(BUILD_DIR)_debug
22+
DEBUG_BUILD_DIR := $(PROJECT_DIR)/.$(BUILD_DIR)_debug
1623
endif
1724

1825
DEBUG ?= 0
@@ -24,6 +31,8 @@ else
2431
OTHER_BUILD_DIR := $(DEBUG_BUILD_DIR)
2532
endif
2633

34+
THIRDPARTY_DIR=$(PROJECT_DIR)/3rdparty
35+
2736
# All of the directories containing code.
2837
SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
2938
\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)
@@ -171,7 +180,7 @@ ifneq ("$(wildcard $(CUDA_DIR)/lib64)","")
171180
endif
172181
CUDA_LIB_DIR += $(CUDA_DIR)/lib
173182

174-
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
183+
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include $(THIRDPARTY_DIR)
175184
ifneq ($(CPU_ONLY), 1)
176185
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
177186
LIBRARY_DIRS += $(CUDA_LIB_DIR)
@@ -325,6 +334,8 @@ endif
325334
# cuDNN acceleration configuration.
326335
ifeq ($(USE_CUDNN), 1)
327336
LIBRARIES += cudnn
337+
INCLUDE_DIRS += ${CUDNN_DIR}/include
338+
LIBRARY_DIRS += ${CUDNN_DIR}/install/cuda/lib64
328339
COMMON_FLAGS += -DUSE_CUDNN
329340
endif
330341

@@ -440,7 +451,7 @@ endif
440451
# Define build targets
441452
##############################
442453
.PHONY: all lib test clean docs linecount lint lintclean tools examples $(DIST_ALIASES) \
443-
py mat py$(PROJECT) mat$(PROJECT) proto runtest \
454+
py mat py$(PROJECT) mat$(PROJECT) thirdparty proto runtest \
444455
superclean supercleanlist supercleanfiles warn everything
445456

446457
all: lib tools examples

Makefile.config.example

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Contributions simplifying and improving our build system are welcome!
33

44
# cuDNN acceleration switch (uncomment to build with cuDNN).
5+
# cuDNN version 4 or higher is required.
56
# USE_CUDNN := 1
67

78
# CPU-only switch (uncomment to build without GPU support).

include/caffe/common.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ class Caffe {
136136
inline static curandGenerator_t curand_generator() {
137137
return Get().curand_generator_;
138138
}
139+
#ifdef USE_CUDNN
140+
inline static cudnnHandle_t cudnn_handle() { return Get().cudnn_handle_; }
141+
#endif
139142
#endif
140143

141144
// Returns the mode: running on CPU or GPU.
@@ -168,6 +171,9 @@ class Caffe {
168171
#ifndef CPU_ONLY
169172
cublasHandle_t cublas_handle_;
170173
curandGenerator_t curand_generator_;
174+
#ifdef USE_CUDNN
175+
cudnnHandle_t cudnn_handle_;
176+
#endif
171177
#endif
172178
shared_ptr<RNG> random_generator_;
173179

include/caffe/layer.hpp

+20-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class Layer {
3838
* layer.
3939
*/
4040
explicit Layer(const LayerParameter& param)
41-
: layer_param_(param), is_shared_(false) {
41+
: layer_param_(param), is_shared_(false),
42+
forward_passed_(false), backward_passed_(false) {
4243
// Set phase and copy blobs (if there are any).
4344
phase_ = param.phase();
4445
if (layer_param_.blobs_size() > 0) {
@@ -316,6 +317,21 @@ class Layer {
316317
param_propagate_down_[param_id] = value;
317318
}
318319

320+
bool IsForwardPassed() const {
321+
return forward_passed_;
322+
}
323+
324+
void ForwardPassed(bool passed) {
325+
forward_passed_ = passed;
326+
}
327+
328+
bool IsBackwardPassed() const {
329+
return backward_passed_;
330+
}
331+
332+
void BackwardPassed(bool passed) {
333+
backward_passed_ = passed;
334+
}
319335

320336
protected:
321337
/** The protobuf that stores the layer parameters */
@@ -431,6 +447,9 @@ class Layer {
431447
/** Whether this layer is actually shared by other nets*/
432448
bool is_shared_;
433449

450+
bool forward_passed_;
451+
bool backward_passed_;
452+
434453
/** The mutex for sequential forward if this layer is shared */
435454
shared_ptr<boost::mutex> forward_mutex_;
436455

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#ifndef CAFFE_CUDNN_BATCH_NORM_LAYER_HPP_
2+
#define CAFFE_CUDNN_BATCH_NORM_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
#include "caffe/layers/batch_norm_layer.hpp"
11+
12+
namespace caffe {
13+
14+
#ifdef USE_CUDNN
15+
template <typename Dtype>
16+
class CuDNNBatchNormLayer : public BatchNormLayer<Dtype> {
17+
public:
18+
explicit CuDNNBatchNormLayer(const LayerParameter& param)
19+
: BatchNormLayer<Dtype>(param), epsilon_(1e-4), handles_setup_(false) {}
20+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
21+
const vector<Blob<Dtype>*>& top);
22+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
23+
const vector<Blob<Dtype>*>& top);
24+
virtual ~CuDNNBatchNormLayer();
25+
26+
protected:
27+
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
28+
const vector<Blob<Dtype>*>& top);
29+
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
30+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
31+
32+
// cuDNN descriptors / handles
33+
cudnnTensorDescriptor_t bottom_desc_, top_desc_;
34+
cudnnTensorDescriptor_t scale_bias_mean_var_desc_;
35+
cudnnBatchNormMode_t mode_;
36+
37+
double epsilon_;
38+
Blob<Dtype> save_mean_, save_inv_var_;
39+
bool handles_setup_;
40+
};
41+
#endif
42+
43+
} // namespace caffe
44+
45+
#endif // CAFFE_CUDNN_BATCH_NORM_LAYER_HPP_

include/caffe/layers/cudnn_conv_layer.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include "caffe/proto/caffe.pb.h"
99

1010
#include "caffe/layers/conv_layer.hpp"
11+
#ifndef CPU_ONLY
12+
#include "caffe/util/gpu_memory.hpp"
13+
#endif
1114

1215
namespace caffe {
1316

@@ -44,8 +47,6 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
4447
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
4548

4649
bool handles_setup_;
47-
cudnnHandle_t* handle_;
48-
cudaStream_t* stream_;
4950

5051
// algorithms for forward and backwards convolutions
5152
cudnnConvolutionFwdAlgo_t *fwd_algo_;
@@ -56,14 +57,13 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
5657
cudnnTensorDescriptor_t bias_desc_;
5758
cudnnFilterDescriptor_t filter_desc_;
5859
vector<cudnnConvolutionDescriptor_t> conv_descs_;
59-
int bottom_offset_, top_offset_, bias_offset_;
60+
61+
int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
6062

6163
size_t *workspace_fwd_sizes_;
6264
size_t *workspace_bwd_data_sizes_;
6365
size_t *workspace_bwd_filter_sizes_;
64-
size_t workspaceSizeInBytes; // size of underlying storage
65-
void *workspaceData; // underlying storage
66-
void **workspace; // aliases into workspaceData
66+
gpu_memory::buffer workspace;
6767
};
6868
#endif
6969

include/caffe/layers/cudnn_lcn_layer.hpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
#include "caffe/layers/lrn_layer.hpp"
1111
#include "caffe/layers/power_layer.hpp"
12+
#ifndef CPU_ONLY
13+
#include "caffe/util/gpu_memory.hpp"
14+
#endif
1215

1316
namespace caffe {
1417

@@ -17,8 +20,7 @@ template <typename Dtype>
1720
class CuDNNLCNLayer : public LRNLayer<Dtype> {
1821
public:
1922
explicit CuDNNLCNLayer(const LayerParameter& param)
20-
: LRNLayer<Dtype>(param), handles_setup_(false), tempDataSize(0),
21-
tempData1(NULL), tempData2(NULL) {}
23+
: LRNLayer<Dtype>(param), handles_setup_(false), tempDataSize_(0) {}
2224
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
2325
const vector<Blob<Dtype>*>& top);
2426
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
@@ -32,15 +34,14 @@ class CuDNNLCNLayer : public LRNLayer<Dtype> {
3234
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
3335

3436
bool handles_setup_;
35-
cudnnHandle_t handle_;
3637
cudnnLRNDescriptor_t norm_desc_;
3738
cudnnTensorDescriptor_t bottom_desc_, top_desc_;
3839

3940
int size_, pre_pad_;
4041
Dtype alpha_, beta_, k_;
4142

42-
size_t tempDataSize;
43-
void *tempData1, *tempData2;
43+
size_t tempDataSize_;
44+
gpu_memory::buffer temp1_, temp2_;
4445
};
4546
#endif
4647

include/caffe/layers/cudnn_relu_layer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
3434
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
3535

3636
bool handles_setup_;
37-
cudnnHandle_t handle_;
3837
cudnnTensorDescriptor_t bottom_desc_;
3938
cudnnTensorDescriptor_t top_desc_;
39+
cudnnActivationDescriptor_t activ_desc_;
4040
};
4141
#endif
4242

include/caffe/layers/cudnn_sigmoid_layer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
3434
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
3535

3636
bool handles_setup_;
37-
cudnnHandle_t handle_;
3837
cudnnTensorDescriptor_t bottom_desc_;
3938
cudnnTensorDescriptor_t top_desc_;
39+
cudnnActivationDescriptor_t activ_desc_;
4040
};
4141
#endif
4242

include/caffe/layers/cudnn_tanh_layer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
3434
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
3535

3636
bool handles_setup_;
37-
cudnnHandle_t handle_;
3837
cudnnTensorDescriptor_t bottom_desc_;
3938
cudnnTensorDescriptor_t top_desc_;
39+
cudnnActivationDescriptor_t activ_desc_;
4040
};
4141
#endif
4242

include/caffe/parallel.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class GPUParams : public Params<Dtype> {
5757
using Params<Dtype>::size_;
5858
using Params<Dtype>::data_;
5959
using Params<Dtype>::diff_;
60+
private:
61+
int buffer_device_;
6062
};
6163

6264
class DevicePair {

include/caffe/util/cudnn.hpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include "caffe/proto/caffe.pb.h"
99

1010
#define CUDNN_VERSION_MIN(major, minor, patch) \
11-
(CUDNN_VERSION >= (major * 1000 + minor * 100 + patch))
11+
(CUDNN_VERSION >= (major * 1000 + minor * 100 + patch))
1212

1313
#define CUDNN_CHECK(condition) \
1414
do { \
@@ -91,8 +91,13 @@ template <typename Dtype>
9191
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
9292
int n, int c, int h, int w) {
9393
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
94+
#if CUDNN_VERSION_MIN(5, 0, 0)
9495
CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
95-
n, c, h, w));
96+
CUDNN_TENSOR_NCHW, n, c, h, w));
97+
#else
98+
CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
99+
CUDNN_TENSOR_NCHW, n, c, h, w));
100+
#endif
96101
}
97102

98103
template <typename Dtype>
@@ -123,8 +128,15 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
123128
LOG(FATAL) << "Unknown pooling method.";
124129
}
125130
CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
126-
CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
127-
pad_h, pad_w, stride_h, stride_w));
131+
#if CUDNN_VERSION_MIN(5, 0, 0)
132+
CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
133+
CUDNN_PROPAGATE_NAN, h, w,
134+
pad_h, pad_w, stride_h, stride_w));
135+
#else
136+
CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
137+
CUDNN_PROPAGATE_NAN, h, w,
138+
pad_h, pad_w, stride_h, stride_w));
139+
#endif
128140
}
129141

130142
} // namespace cudnn

0 commit comments

Comments
 (0)