Skip to content

Commit bd3db01

Browse files
Wei Weipytorchmergebot
Wei Wei
authored andcommittedMar 4, 2022
Update fbcode symlinks for mkl-dnn ideep 2.5.2
Summary: as titled Test Plan: buck test caffe2/test:nn Reviewed By: VitalyFedyunin, luciang Differential Revision: D34285331 fbshipit-source-id: 5144b3ae1dce02e995d1d633443fb660c57df101 (cherry picked from commit 61f1255)

File tree

3 files changed

+30
-5
lines changed

3 files changed

+30
-5
lines changed
 

‎caffe2/ideep/operators/order_switch_ops.cc

+8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ class IDEEPNHWC2NCHWOp final : public IDEEPOperator {
2222
// Thus, for iDEEP tensor, the shapes of NCHW and NHWC are identical.
2323
Y->init({X.get_dims(), X.get_data_type(), iformat::nchw});
2424
Y->feed_from(X);
25+
// NOTE: This ops is only used to quantization path, setting scale
26+
// to distinguish with fp32 path activation(always return NCHW format
27+
// even ideep tensor has NHWC format) when convert to numpy memory.
28+
Y->set_scale({1.0});
2529
return true;
2630
}
2731

@@ -48,6 +52,10 @@ class IDEEPNCHW2NHWCOp final : public IDEEPOperator {
4852
// Thus, for iDEEP tensor, the shapes of NCHW and NHWC are identical.
4953
Y->init({X.get_dims(), X.get_data_type(), iformat::nhwc});
5054
Y->feed_from(X);
55+
// NOTE: This ops is only used to quantization path, setting scale
56+
// to distinguish with fp32 path activation(always return NCHW format
57+
// even ideep tensor has NHWC format) when convert to numpy memory.
58+
Y->set_scale({1.0});
5159
return true;
5260
}
5361

‎caffe2/ideep/operators/utility_ops.cc

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "caffe2/operators/utility_ops.h"
1+
#include "caffe2/operators/utility_ops.h"
22
#include "caffe2/core/operator.h"
33
#include "caffe2/ideep/ideep_utils.h"
44

@@ -64,7 +64,10 @@ class CopyIDEEPToCPUOp final : public IDEEPOperator {
6464
}
6565
auto* Y =
6666
OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(CPU));
67-
X.to_public(Y->template mutable_data<float>());
67+
itensor temp_ten(
68+
X.get_desc().to_default_format(),
69+
Y->template mutable_data<float>());
70+
X.reorder_to(temp_ten);
6871
} else {
6972
CAFFE_THROW("Unsupported ideep type: ",
7073
static_cast<int>(X.get_data_type()));

‎caffe2/python/pybind_state_ideep.cc

+17-3
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,19 @@ class IDeepFetcher : public BlobFetcherBase {
6565
numpy_type != -1,
6666
"Unsupported ideep memory data type? This usually should not happen "
6767
"since ideep memory usually only do float and double.");
68-
itensor::dims dims = atensor.get_public_format_dims();
68+
itensor::dims dims;
69+
bool need_reorder = atensor.need_reorder();
70+
if (atensor.get_data_type() == idtype::f32 && !atensor.has_scale()) {
71+
// For FP32 path, only support NCHW format input, so if atensor
72+
// has NHWC format, we need reorder it to NCHW format.
73+
dims = atensor.get_dims();
74+
need_reorder = need_reorder || atensor.get_desc().is_nhwc();
75+
} else {
76+
dims = atensor.get_public_format_dims();
77+
}
6978
std::vector<npy_intp> npy_dims(dims.begin(), dims.end());
7079

71-
result.copied = force_copy || atensor.need_reorder();
80+
result.copied = force_copy || need_reorder;
7281
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
7382
void* outPtr;
7483
if (result.copied) {
@@ -87,7 +96,12 @@ class IDeepFetcher : public BlobFetcherBase {
8796
}
8897

8998
if (result.copied) {
90-
atensor.to_public(outPtr);
99+
if (atensor.get_data_type() == idtype::f32 && !atensor.has_scale()) {
100+
itensor temp_ten(atensor.get_desc().to_default_format(), outPtr);
101+
atensor.reorder_to(temp_ten);
102+
} else {
103+
atensor.to_public(outPtr);
104+
}
91105
}
92106

93107
return result;

0 commit comments

Comments
 (0)
Please sign in to comment.