Skip to content

Commit 5a2a732

Browse files
authored
[ARM plugin] Add external neon kernels (#489)
1 parent 3c1eaf2 commit 5a2a732

File tree

8 files changed

+560
-13
lines changed

8 files changed

+560
-13
lines changed

modules/arm_plugin/src/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ target_include_directories(${TARGET_NAME} PRIVATE
3838
"${CMAKE_CURRENT_SOURCE_DIR}"
3939
"${IE_MAIN_ARM_PLUGIN_SOURCE_DIR}/include")
4040

41+
target_include_directories(${TARGET_NAME} INTERFACE
42+
"${IE_MAIN_ARM_PLUGIN_SOURCE_DIR}/thirdparty/external_kernels/ncnn")
4143
target_link_libraries(${TARGET_NAME} PRIVATE
44+
openvino_arm_cpu_external_kernels
4245
openvino_arm_cpu_transformations
4346
openvino_arm_cpu_opset
4447
arm_compute::arm_compute

modules/arm_plugin/src/arm_converter/arm_converter_arithmetic_unary.cpp

+75-12
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,60 @@
1616
#include <ngraph/runtime/reference/tan.hpp>
1717
#include <ngraph/runtime/reference/tanh.hpp>
1818
#include "arm_converter/arm_converter.hpp"
19+
#include <neon_mathfun.h>
20+
#include <cmath>
1921

2022
namespace ArmPlugin {
23+
namespace external {
24+
void func_neon_f32(const float *arg, float *out, size_t count,
25+
const std::function<float32x4_t(float32x4_t)>& neon_func,
26+
const std::function<float(float)>& ref_func) {
27+
const size_t count_div = count - count % 4;
28+
for (size_t i = 0; i < count_div; i += 4) {
29+
float32x4_t elem = vld1q_f32(arg + i);
30+
float32x4_t res = neon_func(elem);
31+
vst1q_f32(out + i, res);
32+
}
33+
for (size_t i = count_div; i < count; ++i) {
34+
out[i] = ref_func(arg[i]);
35+
}
36+
}
37+
38+
template <typename T>
39+
void acos_neon_f32(const float* arg, float* out, size_t count) {
40+
func_neon_f32(arg, out, count, acos_ps, [&](float x) { return std::acos(x); });
41+
}
42+
43+
template <typename T>
44+
void asin_neon_f32(const float* arg, float* out, size_t count) {
45+
func_neon_f32(arg, out, count, asin_ps, [&](float x) { return std::asin(x); });
46+
}
47+
48+
template <typename T>
49+
void tan_neon_f32(const float* arg, float* out, size_t count) {
50+
func_neon_f32(arg, out, count, tan_ps, [&](float x) { return std::tan(x); });
51+
}
52+
53+
template <typename T>
54+
void cos_neon_f32(const float* arg, float* out, size_t count) {
55+
func_neon_f32(arg, out, count, cos_ps, [&](float x) { return std::cos(x); });
56+
}
57+
58+
} // namespace external
59+
2160
template<> Converter::Conversion::Ptr Converter::Convert(const opset::Acos& node) {
2261
auto make = [&] (auto refFunction) {
2362
return this->MakeConversion(refFunction, node.input(0), node.output(0), ngraph::shape_size(node.get_output_shape(0)));
2463
};
25-
return CallSwitch(
26-
AP_WRAP(make, ngraph::runtime::reference::acos),
27-
node.input(0), floatTypes);
64+
if (node.input(0).get_element_type() == ngraph::element::f32) {
65+
return CallSwitch(
66+
AP_WRAP(make, external::acos_neon_f32),
67+
node.input(0), floatTypes);
68+
} else {
69+
return CallSwitch(
70+
AP_WRAP(make, ngraph::runtime::reference::acos),
71+
node.input(0), floatTypes);
72+
}
2873
}
2974

3075
template<> Converter::Conversion::Ptr Converter::Convert(const opset::Acosh& node) {
@@ -40,9 +85,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Asin& node
4085
auto make = [&] (auto refFunction) {
4186
return this->MakeConversion(refFunction, node.input(0), node.output(0), ngraph::shape_size(node.get_output_shape(0)));
4287
};
43-
return CallSwitch(
44-
AP_WRAP(make, ngraph::runtime::reference::asin),
45-
node.input(0), floatTypes);
88+
if (node.input(0).get_element_type() == ngraph::element::f32) {
89+
return CallSwitch(
90+
AP_WRAP(make, external::asin_neon_f32),
91+
node.input(0), floatTypes);
92+
} else {
93+
return CallSwitch(
94+
AP_WRAP(make, ngraph::runtime::reference::asin),
95+
node.input(0), floatTypes);
96+
}
4697
}
4798

4899
template<> Converter::Conversion::Ptr Converter::Convert(const opset::Asinh& node) {
@@ -76,9 +127,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Cos& node)
76127
auto make = [&] (auto refFunction) {
77128
return this->MakeConversion(refFunction, node.input(0), node.output(0), ngraph::shape_size(node.get_output_shape(0)));
78129
};
79-
return CallSwitch(
80-
AP_WRAP(make, ngraph::runtime::reference::cos),
81-
node.input(0), floatTypes);
130+
if (node.input(0).get_element_type() == ngraph::element::f32) {
131+
return CallSwitch(
132+
AP_WRAP(make, external::cos_neon_f32),
133+
node.input(0), floatTypes);
134+
} else {
135+
return CallSwitch(
136+
AP_WRAP(make, ngraph::runtime::reference::cos),
137+
node.input(0), floatTypes);
138+
}
82139
}
83140

84141
template<> Converter::Conversion::Ptr Converter::Convert(const opset::Cosh& node) {
@@ -107,9 +164,15 @@ template<> Converter::Conversion::Ptr Converter::Convert(const opset::Tan& node)
107164
auto make = [&] (auto refFunction) {
108165
return this->MakeConversion(refFunction, node.input(0), node.output(0), ngraph::shape_size(node.get_output_shape(0)));
109166
};
110-
return CallSwitch(
111-
AP_WRAP(make, ngraph::runtime::reference::tan),
112-
node.input(0), floatTypes);
167+
if (node.input(0).get_element_type() == ngraph::element::f32) {
168+
return CallSwitch(
169+
AP_WRAP(make, external::tan_neon_f32),
170+
node.input(0), floatTypes);
171+
} else {
172+
return CallSwitch(
173+
AP_WRAP(make, ngraph::runtime::reference::tan),
174+
node.input(0), floatTypes);
175+
}
113176
}
114177

115178
template<> Converter::Conversion::Ptr Converter::Convert(const opset::Erf& node) {

modules/arm_plugin/third-party-programs.txt

+25-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ terms are listed below.
1414

1515
-------------------------------------------------------------
1616

17-
1. ComputeLibrary
17+
ComputeLibrary
1818
Copyright (c) 2017-2022 Arm Limited
1919

2020
MIT License
@@ -38,3 +38,27 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3838
SOFTWARE.
3939

4040
-------------------------------------------------------------
41+
42+
neon_mathfun.h
43+
44+
/* Copyright (C) 2011 Julien Pommier
45+
*
46+
* This software is provided 'as-is', without any express or implied
47+
* warranty. In no event will the authors be held liable for any damages
48+
* arising from the use of this software.
49+
*
50+
* Permission is granted to anyone to use this software for any purpose,
51+
* including commercial applications, and to alter it and redistribute it
52+
* freely, subject to the following restrictions:
53+
*
54+
* 1. The origin of this software must not be misrepresented; you must not
55+
* claim that you wrote the original software. If you use this software
56+
* in a product, an acknowledgment in the product documentation would be
57+
* appreciated but is not required.
58+
* 2. Altered source versions must be plainly marked as such, and must not be
59+
* misrepresented as being the original software.
60+
* 3. This notice may not be removed or altered from any source distribution.
61+
*
62+
* (this is the zlib license)
63+
*/
64+

modules/arm_plugin/thirdparty/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,5 @@ elseif(NOT TARGET arm_compute_static_libs)
248248
set_target_properties(arm_compute::half PROPERTIES
249249
INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/ComputeLibrary/include)
250250
endif()
251+
252+
add_subdirectory(external_kernels)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2020-2022 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
5+
set(TARGET_NAME "openvino_arm_cpu_external_kernels")
6+
7+
add_library(${TARGET_NAME} INTERFACE)
8+
9+
target_include_directories(${TARGET_NAME} INTERFACE
10+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/ncnn>)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# List of external files that used in [OpenVINO ARM plugin](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/arm_plugin)
2+
3+
| **Original link on file** | **Original repo** |
4+
|----------------------------------------------------------------------------------------------|-------------------|
5+
| [ncnn/neon_math.h](https://github.com/Tencent/ncnn/blob/98e35ded36fc6ada6738d442d0811a3ac6fb46c2/src/layer/arm/neon_mathfun.h) | [NCNN](https://github.com/Tencent/ncnn) |

0 commit comments

Comments
 (0)