Skip to content

Commit 2ac2b50

Browse files
[CLANG_FORMAT] Enable clang-format for TPP adaptation source code (openvinotoolkit#28522)
1 parent 775ae1a commit 2ac2b50

31 files changed

+464
-396
lines changed

.github/workflows/code_style.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ jobs:
2424
sudo apt update
2525
sudo apt --assume-yes install clang-format-15
2626
27-
# Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
27+
# Run cmake with extra options to cover as much source code as possible:
28+
# - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector
29+
# - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation
2830
- name: CMake configure
29-
run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -B build
31+
run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -B build
3032

3133
- name: Create code style diff
3234
run: cmake --build build --target clang_format_fix_all -j8
@@ -54,9 +56,11 @@ jobs:
5456
sudo apt update
5557
sudo apt --assume-yes install binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu scons clang-format-15
5658
57-
# Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
59+
# Run cmake with extra options to cover as much source code as possible:
60+
# - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector
61+
# - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation
5862
- name: CMake configure
59-
run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64
63+
run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64
6064

6165
- name: Create code style diff
6266
run: cmake --build build_arm64 --target clang_format_fix_all -j8

src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp

+23-23
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//
44

55
#include "jit_brgemm_emitter.hpp"
6+
67
#include "emitters/snippets/x64/jit_snippets_emitters.hpp"
78
#include "transformations/tpp/x64/op/brgemm.hpp"
89

@@ -28,18 +29,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
2829
const auto& input_1_desc = expr->get_input_port_descriptor(1);
2930
const auto& output_desc = expr->get_output_port_descriptor(0);
3031

31-
std::vector<size_t> leading_dimensions {brgemm_node->get_input_stride(0),
32-
brgemm_node->get_input_stride(1),
33-
brgemm_node->get_output_stride(0)};
32+
std::vector<size_t> leading_dimensions{brgemm_node->get_input_stride(0),
33+
brgemm_node->get_input_stride(1),
34+
brgemm_node->get_output_stride(0)};
3435

3536
auto in_0_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0));
3637
auto in_1_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1));
37-
exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ?
38-
LIBXSMM_DATATYPE_I32 :
39-
LIBXSMM_DATATYPE_F32;
40-
auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ?
41-
LIBXSMM_DATATYPE_I32 :
42-
LIBXSMM_DATATYPE_F32;
38+
exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? LIBXSMM_DATATYPE_I32
39+
: LIBXSMM_DATATYPE_F32;
40+
auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32;
4341

4442
const auto beta = brgemm_node->get_beta();
4543
OV_CPU_JIT_EMITTER_ASSERT(beta == 0 || beta == 1, "Detected unsupported beta value: " + std::to_string(beta));
@@ -54,18 +52,14 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
5452
const auto N = static_cast<libxsmm_blasint>(*subtensor_in1.rbegin());
5553

5654
const bool is_f32_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_F32;
57-
const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16;
55+
const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16;
5856
const bool is_i8_gemm = in_0_prec == LIBXSMM_DATATYPE_U8 || in_0_prec == LIBXSMM_DATATYPE_I8;
59-
OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm ||
60-
(is_bf16_gemm && K % 2 == 0) ||
61-
(is_i8_gemm && K % 4 == 0),
57+
OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm || (is_bf16_gemm && K % 2 == 0) || (is_i8_gemm && K % 4 == 0),
6258
"Unsupported parameter combination for kernel configuration");
6359

64-
m_compile_flags = is_f32_gemm ?
65-
LIBXSMM_GEMM_FLAGS('N', 'N') :
66-
LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') |
67-
LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG |
68-
LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG;
60+
m_compile_flags = is_f32_gemm ? LIBXSMM_GEMM_FLAGS('N', 'N')
61+
: LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') |
62+
LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG | LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG;
6963

7064
if (beta == 0)
7165
m_compile_flags |= LIBXSMM_GEMM_FLAG_BETA_0;
@@ -79,9 +73,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
7973
m_compile_flags |= LIBXSMM_GEMM_FLAG_B_UNSIGNED;
8074
}
8175

82-
m_shape = libxsmm_create_gemm_shape(N, M, K,
83-
io_strides[1], io_strides[0], io_strides[2],
84-
in_1_prec, in_0_prec, out_0_prec,
76+
m_shape = libxsmm_create_gemm_shape(N,
77+
M,
78+
K,
79+
io_strides[1],
80+
io_strides[0],
81+
io_strides[2],
82+
in_1_prec,
83+
in_0_prec,
84+
out_0_prec,
8585
exec_dtype);
8686
m_prefetching_flags = LIBXSMM_GEMM_PREFETCH_NONE;
8787
}
@@ -91,7 +91,7 @@ std::set<std::vector<element::Type>> BrgemmTppEmitter::get_supported_precisions(
9191
return {{element::f32, element::f32}};
9292
}
9393

94-
void BrgemmTppEmitter::validate_arguments(const std::vector<size_t> &in, const std::vector<size_t> &out) const {
94+
void BrgemmTppEmitter::validate_arguments(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
9595
OV_CPU_JIT_EMITTER_ASSERT(in.size() == 2, "Expects 2 input regs, got" + std::to_string(in.size()));
9696
OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size()));
9797
}
@@ -100,7 +100,7 @@ const uintptr_t BrgemmTppEmitter::get_compiled_kernel_ptr() const {
100100
return COMPILE_TPP_KERNEL(libxsmm_dispatch_gemm(m_shape, m_compile_flags, m_prefetching_flags));
101101
}
102102

103-
void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void *in0, void *in1, void *out0) {
103+
void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void* in0, void* in1, void* out0) {
104104
libxsmm_gemm_param gemm_p;
105105
gemm_p.a.primary = in1;
106106
gemm_p.b.primary = in0;

src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//
44

55
#include "jit_scalar_emitter.hpp"
6+
67
#include "emitters/snippets/x64/jit_snippets_emitters.hpp"
78

89
using namespace Xbyak;

src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp

+10-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
//
44

55
#pragma once
6-
#include "snippets/lowered/expression.hpp"
76
#include "emitters/plugin/x64/jit_emitter.hpp"
7+
#include "snippets/lowered/expression.hpp"
88

99
namespace ov {
1010
namespace intel_cpu {
@@ -13,11 +13,16 @@ class ScalarTppEmitter : public jit_emitter {
1313
ScalarTppEmitter(dnnl::impl::cpu::x64::jit_generator* h,
1414
dnnl::impl::cpu::x64::cpu_isa_t isa,
1515
const ov::snippets::lowered::ExpressionPtr& expr);
16-
size_t get_inputs_num() const override {return 0;}
17-
size_t aux_gprs_count() const override {return 1;}
16+
size_t get_inputs_num() const override {
17+
return 0;
18+
}
19+
size_t aux_gprs_count() const override {
20+
return 1;
21+
}
22+
1823
private:
1924
void emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const override;
2025
};
2126

22-
} // namespace intel_cpu
23-
} // namespace ov
27+
} // namespace intel_cpu
28+
} // namespace ov

src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp

+34-24
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,60 @@
33
//
44

55
#include "brgemm.hpp"
6+
67
#include "snippets/itt.hpp"
7-
#include "snippets/utils/utils.hpp"
88
#include "snippets/lowered/port_descriptor.hpp"
9+
#include "snippets/utils/utils.hpp"
910
#include "utils/general_utils.h"
1011

11-
1212
namespace ov {
1313
namespace intel_cpu {
1414
namespace tpp {
1515
namespace op {
1616

17-
BrgemmTPP::BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
18-
const size_t offset_a, const size_t offset_b, const size_t offset_c,
19-
std::vector<size_t> layout_a, std::vector<size_t> layout_b, std::vector<size_t> layout_c,
17+
BrgemmTPP::BrgemmTPP(const Output<Node>& A,
18+
const Output<Node>& B,
19+
const size_t offset_a,
20+
const size_t offset_b,
21+
const size_t offset_c,
22+
std::vector<size_t> layout_a,
23+
std::vector<size_t> layout_b,
24+
std::vector<size_t> layout_c,
2025
const float beta)
2126
: MemoryAccess(std::set<size_t>{0, 1}, std::set<size_t>{0}),
2227
modifier::TensorProcessingPrimitive(),
23-
Brgemm(A, B,
24-
offset_a, offset_b, offset_c,
25-
std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
28+
Brgemm(A, B, offset_a, offset_b, offset_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
2629
set_beta(beta);
2730
}
2831

29-
BrgemmTPP::BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
30-
const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c,
31-
std::vector<size_t> layout_a, std::vector<size_t> layout_b, std::vector<size_t> layout_c,
32+
BrgemmTPP::BrgemmTPP(const Output<Node>& A,
33+
const Output<Node>& B,
34+
const PortDescriptor& desc_a,
35+
const PortDescriptor& desc_b,
36+
const PortDescriptor& desc_c,
37+
std::vector<size_t> layout_a,
38+
std::vector<size_t> layout_b,
39+
std::vector<size_t> layout_c,
3240
const float beta)
3341
: MemoryAccess(PortMap{{0, desc_a}, {1, desc_b}}, PortMap{{0, desc_c}}),
3442
modifier::TensorProcessingPrimitive(),
35-
Brgemm(A, B,
36-
desc_a, desc_b, desc_c,
37-
std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
43+
Brgemm(A, B, desc_a, desc_b, desc_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
3844
set_beta(beta);
3945
}
4046

4147
std::shared_ptr<Node> BrgemmTPP::clone_with_new_inputs(const OutputVector& new_args) const {
4248
INTERNAL_OP_SCOPE(BrgemmTPP_clone_with_new_inputs);
4349
check_new_args_count(this, new_args);
44-
return std::make_shared<BrgemmTPP>(new_args.at(0), new_args.at(1),
45-
get_input_port_descriptor(0), get_input_port_descriptor(1), get_output_port_descriptor(0),
46-
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(),
47-
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(),
48-
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(),
49-
m_beta);
50+
return std::make_shared<BrgemmTPP>(
51+
new_args.at(0),
52+
new_args.at(1),
53+
get_input_port_descriptor(0),
54+
get_input_port_descriptor(1),
55+
get_output_port_descriptor(0),
56+
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(),
57+
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(),
58+
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(),
59+
m_beta);
5060
}
5161

5262
bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) {
@@ -55,7 +65,7 @@ bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) {
5565
return Brgemm::visit_attributes(visitor);
5666
}
5767

58-
} // namespace op
59-
} // namespace tpp
60-
} // namespace intel_cpu
61-
} // namespace ov
68+
} // namespace op
69+
} // namespace tpp
70+
} // namespace intel_cpu
71+
} // namespace ov

src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp

+27-13
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
#pragma once
66

7-
#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
87
#include "modifiers.hpp"
8+
#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
99

1010
namespace ov {
1111
namespace intel_cpu {
@@ -22,28 +22,42 @@ class BrgemmTPP : virtual public modifier::TensorProcessingPrimitive, public sni
2222
public:
2323
OPENVINO_OP("Brgemm", "TppOpset", snippets::op::Brgemm);
2424

25-
BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
26-
size_t offset_a = 0, size_t offset_b = 0, size_t offset_c = 0,
27-
std::vector<size_t> layout_a = {}, std::vector<size_t> layout_b = {}, std::vector<size_t> layout_c = {},
25+
BrgemmTPP(const Output<Node>& A,
26+
const Output<Node>& B,
27+
size_t offset_a = 0,
28+
size_t offset_b = 0,
29+
size_t offset_c = 0,
30+
std::vector<size_t> layout_a = {},
31+
std::vector<size_t> layout_b = {},
32+
std::vector<size_t> layout_c = {},
2833
float beta = 1);
29-
BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
30-
const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c,
31-
std::vector<size_t> layout_a = {}, std::vector<size_t> layout_b = {}, std::vector<size_t> layout_c = {},
34+
BrgemmTPP(const Output<Node>& A,
35+
const Output<Node>& B,
36+
const PortDescriptor& desc_a,
37+
const PortDescriptor& desc_b,
38+
const PortDescriptor& desc_c,
39+
std::vector<size_t> layout_a = {},
40+
std::vector<size_t> layout_b = {},
41+
std::vector<size_t> layout_c = {},
3242
float beta = 1);
3343
BrgemmTPP() = default;
3444

3545
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
3646

3747
bool visit_attributes(AttributeVisitor& visitor) override;
3848

39-
float get_beta() const { return m_beta; }
40-
void set_beta(float beta) { m_beta = beta; }
49+
float get_beta() const {
50+
return m_beta;
51+
}
52+
void set_beta(float beta) {
53+
m_beta = beta;
54+
}
4155

4256
private:
4357
float m_beta = 0.f;
4458
};
4559

46-
} // namespace op
47-
} // namespace tpp
48-
} // namespace intel_cpu
49-
} // namespace ov
60+
} // namespace op
61+
} // namespace tpp
62+
} // namespace intel_cpu
63+
} // namespace ov

0 commit comments

Comments
 (0)