16
16
#include < ngraph/runtime/reference/tan.hpp>
17
17
#include < ngraph/runtime/reference/tanh.hpp>
18
18
#include " arm_converter/arm_converter.hpp"
19
+ #include < neon_mathfun.h>
20
+ #include < cmath>
19
21
20
22
namespace ArmPlugin {
23
+ namespace external {
24
+ void func_neon_f32 (const float *arg, float *out, size_t count,
25
+ const std::function<float32x4_t (float32x4_t )>& neon_func,
26
+ const std::function<float(float )>& ref_func) {
27
+ const size_t count_div = count - count % 4 ;
28
+ for (size_t i = 0 ; i < count_div; i += 4 ) {
29
+ float32x4_t elem = vld1q_f32 (arg + i);
30
+ float32x4_t res = neon_func (elem);
31
+ vst1q_f32 (out + i, res);
32
+ }
33
+ for (size_t i = count_div; i < count; ++i) {
34
+ out[i] = ref_func (arg[i]);
35
+ }
36
+ }
37
+
38
+ template <typename T>
39
+ void acos_neon_f32 (const float * arg, float * out, size_t count) {
40
+ func_neon_f32 (arg, out, count, acos_ps, [&](float x) { return std::acos (x); });
41
+ }
42
+
43
+ template <typename T>
44
+ void asin_neon_f32 (const float * arg, float * out, size_t count) {
45
+ func_neon_f32 (arg, out, count, asin_ps, [&](float x) { return std::asin (x); });
46
+ }
47
+
48
+ template <typename T>
49
+ void tan_neon_f32 (const float * arg, float * out, size_t count) {
50
+ func_neon_f32 (arg, out, count, tan_ps, [&](float x) { return std::tan (x); });
51
+ }
52
+
53
+ template <typename T>
54
+ void cos_neon_f32 (const float * arg, float * out, size_t count) {
55
+ func_neon_f32 (arg, out, count, cos_ps, [&](float x) { return std::cos (x); });
56
+ }
57
+
58
+ } // namespace external
59
+
21
60
template <> Converter::Conversion::Ptr Converter::Convert (const opset::Acos& node) {
22
61
auto make = [&] (auto refFunction) {
23
62
return this ->MakeConversion (refFunction, node.input (0 ), node.output (0 ), ngraph::shape_size (node.get_output_shape (0 )));
24
63
};
25
- return CallSwitch (
26
- AP_WRAP (make, ngraph::runtime::reference::acos ),
27
- node.input (0 ), floatTypes);
64
+ if (node.input (0 ).get_element_type () == ngraph::element::f32) {
65
+ return CallSwitch (
66
+ AP_WRAP (make, external::acos_neon_f32),
67
+ node.input (0 ), floatTypes);
68
+ } else {
69
+ return CallSwitch (
70
+ AP_WRAP (make, ngraph::runtime::reference::acos ),
71
+ node.input (0 ), floatTypes);
72
+ }
28
73
}
29
74
30
75
template <> Converter::Conversion::Ptr Converter::Convert (const opset::Acosh& node) {
@@ -40,9 +85,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Asin& node
40
85
auto make = [&] (auto refFunction) {
41
86
return this ->MakeConversion (refFunction, node.input (0 ), node.output (0 ), ngraph::shape_size (node.get_output_shape (0 )));
42
87
};
43
- return CallSwitch (
44
- AP_WRAP (make, ngraph::runtime::reference::asin ),
45
- node.input (0 ), floatTypes);
88
+ if (node.input (0 ).get_element_type () == ngraph::element::f32) {
89
+ return CallSwitch (
90
+ AP_WRAP (make, external::asin_neon_f32),
91
+ node.input (0 ), floatTypes);
92
+ } else {
93
+ return CallSwitch (
94
+ AP_WRAP (make, ngraph::runtime::reference::asin ),
95
+ node.input (0 ), floatTypes);
96
+ }
46
97
}
47
98
48
99
template <> Converter::Conversion::Ptr Converter::Convert (const opset::Asinh& node) {
@@ -76,9 +127,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Cos& node)
76
127
auto make = [&] (auto refFunction) {
77
128
return this ->MakeConversion (refFunction, node.input (0 ), node.output (0 ), ngraph::shape_size (node.get_output_shape (0 )));
78
129
};
79
- return CallSwitch (
80
- AP_WRAP (make, ngraph::runtime::reference::cos ),
81
- node.input (0 ), floatTypes);
130
+ if (node.input (0 ).get_element_type () == ngraph::element::f32) {
131
+ return CallSwitch (
132
+ AP_WRAP (make, external::cos_neon_f32),
133
+ node.input (0 ), floatTypes);
134
+ } else {
135
+ return CallSwitch (
136
+ AP_WRAP (make, ngraph::runtime::reference::cos ),
137
+ node.input (0 ), floatTypes);
138
+ }
82
139
}
83
140
84
141
template <> Converter::Conversion::Ptr Converter::Convert (const opset::Cosh& node) {
@@ -107,9 +164,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Tan& node)
107
164
auto make = [&] (auto refFunction) {
108
165
return this ->MakeConversion (refFunction, node.input (0 ), node.output (0 ), ngraph::shape_size (node.get_output_shape (0 )));
109
166
};
110
- return CallSwitch (
111
- AP_WRAP (make, ngraph::runtime::reference::tan ),
112
- node.input (0 ), floatTypes);
167
+ if (node.input (0 ).get_element_type () == ngraph::element::f32) {
168
+ return CallSwitch (
169
+ AP_WRAP (make, external::tan_neon_f32),
170
+ node.input (0 ), floatTypes);
171
+ } else {
172
+ return CallSwitch (
173
+ AP_WRAP (make, ngraph::runtime::reference::tan ),
174
+ node.input (0 ), floatTypes);
175
+ }
113
176
}
114
177
115
178
template <> Converter::Conversion::Ptr Converter::Convert (const opset::Erf& node) {
0 commit comments