1
+ // Copyright (C) 2018-2023 Intel Corporation
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+
5
+ #pragma once
6
+
7
+ #include < cstddef>
8
+ #include < cstdint>
9
+
10
+ namespace ov {
11
+ namespace intel_cpu {
12
+ /* *
13
+ * @brief Computes the length in bytes for the packed matrix B buffer(SGEMM).
14
+ *
15
+ * @param N Supplies the number of columns of matrix B.
16
+ * @param K Supplies the number of rows of matrix B.
17
+ * @return bytes of the packing buffer
18
+ */
19
+ size_t mlas_sgemm_pack_get_size (const int64_t N, const int64_t K);
20
+
21
+ /* *
22
+ * @brief Packs the contents of matrix B
23
+ *
24
+ * @param transb T for transpose B, N for none-tranpose B
25
+ * @param N Supplies the number of columns of matrix B and matrix C.
26
+ * @param K Supplies the number of columns of matrix A and the number
27
+ of rows of matrix B.
28
+ * @param ldb Supplies the first dimension of matrix B.
29
+ * @param src Supplies the address of matrix B
30
+ * @param dst Supplies pointer to prePacked B buffer
31
+ */
32
+ void mlas_sgemm_pack (const char * transb,
33
+ const int64_t N,
34
+ const int64_t K,
35
+ const int64_t ldb,
36
+ const float * src,
37
+ float * dst);
38
+
39
+ /* *
40
+ * @brief SGEMM with planar B matrix
41
+ *
42
+ * @param transa T for transpose A, N for none-tranpose A.
43
+ * @param transb T for transpose B, N for none-tranpose B.
44
+ * @param M Supplies the number of rows of matrix A and matrix C.
45
+ * @param N Supplies the number of columns of matrix B and matrix C.
46
+ * @param K Supplies the number of columns of matrix A and the number
47
+ of rows of matrix B.
48
+ * @param alpha Supplies the scalar alpha multiplier (see SGEMM definition)
49
+ * @param A Supplies the address of matrix A
50
+ * @param lda Supplies the first dimension of matrix A.
51
+ * @param B Supplies the address of matrix B
52
+ * @param ldb Supplies the first dimension of matrix B.
53
+ * @param beta Supplies the scalar beta multiplier (see SGEMM definition)
54
+ * @param C Supplies the address of matrix C
55
+ * @param ldc Supplies the first dimension of matrix C.
56
+ * @param thread_num 0 for all threads, otherwise use thread_num
57
+ */
58
+ void mlas_sgemm (const char * transa,
59
+ const char * transb,
60
+ const int64_t M,
61
+ const int64_t N,
62
+ const int64_t K,
63
+ const float alpha,
64
+ const float * A,
65
+ const int64_t lda,
66
+ const float * B,
67
+ const int64_t ldb,
68
+ const float beta,
69
+ float * C,
70
+ const int64_t ldc,
71
+ size_t thread_num = 0 );
72
+
73
+ /* *
74
+ * @brief SGEMM with B matrix prepacked
75
+ *
76
+ * @param transa T for transpose A, N for none-tranpose A.
77
+ * @param transb T for transpose B, N for none-tranpose B.
78
+ * @param M Supplies the number of rows of matrix A and matrix C.
79
+ * @param N Supplies the number of columns of matrix B and matrix C.
80
+ * @param K Supplies the number of columns of matrix A and the number
81
+ of rows of matrix B.
82
+ * @param alpha Supplies the scalar alpha multiplier (see SGEMM definition)
83
+ * @param A Supplies the address of matrix A
84
+ * @param lda Supplies the first dimension of matrix A.
85
+ * @param B Supplies the address of matrix B
86
+ * @param ldb Supplies the first dimension of matrix B.
87
+ * @param beta Supplies the scalar beta multiplier (see SGEMM definition)
88
+ * @param C Supplies the address of matrix C
89
+ * @param ldc Supplies the first dimension of matrix C.
90
+ * @param bias Supplies the address of by-channel bias
91
+ * @param thread_num 0 for all threads, otherwise use thread_num
92
+ */
93
+ void mlas_sgemm_compute (const char * transa,
94
+ const char * transb,
95
+ const int64_t M,
96
+ const int64_t N,
97
+ const int64_t K,
98
+ const float alpha,
99
+ const float * A,
100
+ const int64_t lda,
101
+ const float * B,
102
+ const int64_t ldb,
103
+ const float beta,
104
+ float * C,
105
+ const int64_t ldc,
106
+ const float * bias = nullptr ,
107
+ size_t thread_num = 0 );
108
+ } // namespace intel_cpu
109
+ } // namespace ov
0 commit comments