5
5
6
6
namespace caffe2 {
7
7
namespace {
8
- class ElementwiseRTCFunction
9
- : public CudaRTCFunction<ElementwiseRTCFunction> {
8
+ class ElementwiseRTCFunction : public CudaRTCFunction <ElementwiseRTCFunction> {
10
9
public:
11
10
ElementwiseRTCFunction () : CudaRTCFunction(), name_(GetUniqueName()) {}
12
11
@@ -22,22 +21,21 @@ class ElementwiseRTCFunction
22
21
string name_;
23
22
};
24
23
25
- template <>
24
+ template <>
26
25
string ElementwiseRTCFunction::GetSource (
27
- int input_size, int output_size,
26
+ int input_size,
27
+ int output_size,
28
28
const string command_string) {
29
29
std::stringstream ss;
30
- ss << " extern \" C\" __global__ void " << name_ <<
31
- " (const size_t nthreads, \n " ;
30
+ ss << " extern \" C\" __global__ void " << name_
31
+ << " (const size_t nthreads, \n " ;
32
32
// Insert the parameter list.
33
33
int remain_params = input_size + output_size;
34
34
for (int i = 0 ; i < input_size; ++i) {
35
- ss << " const float* in" << i
36
- << ((remain_params--) ? " , \n " : " " );
35
+ ss << " const float* in" << i << ((remain_params--) ? " , \n " : " " );
37
36
}
38
37
for (int i = 0 ; i < output_size; ++i) {
39
- ss << " float* out" << i
40
- << ((remain_params--) ? " , \n " : " " );
38
+ ss << " float* out" << i << ((remain_params--) ? " , \n " : " " );
41
39
}
42
40
ss << " ) {\n "
43
41
" for (int index = blockIdx.x * blockDim.x + threadIdx.x;\n "
@@ -46,7 +44,7 @@ string ElementwiseRTCFunction::GetSource(
46
44
<< " }\n }" ;
47
45
return ss.str ();
48
46
}
49
- } // namespace
47
+ } // namespace
50
48
51
49
/* *
52
50
* A GPU operator that can generate limited elementwise operations.
@@ -75,17 +73,17 @@ class ElementwiseRTCOp final : public Operator<CUDAContext> {
75
73
public:
76
74
ElementwiseRTCOp (const OperatorDef& operator_def, Workspace* ws)
77
75
: Operator<CUDAContext>(operator_def, ws) {
78
- const string src = OperatorBase::GetSingleArgument<string>(
79
- " rtc_src" , " " );
76
+ const string src = OperatorBase::GetSingleArgument<string>(" rtc_src" , " " );
80
77
CAFFE_ENFORCE (src.size (), " Op should have a non-zero source code size." );
81
78
func_.Compile (InputSize (), OutputSize (), src);
82
79
}
83
80
~ElementwiseRTCOp () override {}
84
81
85
82
bool RunOnDevice () override {
86
- static_assert (sizeof (void *) == sizeof (size_t ),
87
- " The argbuffer relies on the assumption that void* and "
88
- " size_t have the same size." );
83
+ static_assert (
84
+ sizeof (void *) == sizeof (size_t ),
85
+ " The argbuffer relies on the assumption that void* and "
86
+ " size_t have the same size." );
89
87
vector<size_t > argBuffer_vec (InputSize () + OutputSize () + 1 );
90
88
size_t * argBuffer = argBuffer_vec.data ();
91
89
CAFFE_ENFORCE (
@@ -102,10 +100,11 @@ class ElementwiseRTCOp final : public Operator<CUDAContext> {
102
100
}
103
101
size_t argBufferSize = sizeof (argBuffer);
104
102
void * config[] = {
105
- CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
106
- CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize,
107
- CU_LAUNCH_PARAM_END
108
- };
103
+ CU_LAUNCH_PARAM_BUFFER_POINTER,
104
+ argBuffer,
105
+ CU_LAUNCH_PARAM_BUFFER_SIZE,
106
+ &argBufferSize,
107
+ CU_LAUNCH_PARAM_END};
109
108
func_.LaunchEx (
110
109
CAFFE_GET_BLOCKS (Input (0 ).numel ()),
111
110
1 ,
@@ -127,4 +126,4 @@ namespace {
127
126
REGISTER_CUDA_OPERATOR_WITH_ENGINE (ElementwiseRTC, NVRTC, ElementwiseRTCOp);
128
127
}
129
128
130
- } // namespace caffe2
129
+ } // namespace caffe2
0 commit comments