Skip to content

Commit e852036

Browse files
asmushetzelpiiswrong
authored andcommitted
Add BLAS3 and LAPACK routines (apache#6538)
* Added linear algebra operators * more comments about style of wrapper interface * more appropriate fatal exit when lapack does not exist * more comments on row/col-major ordering * added config switch for lapack usage * switched lapack usage off by default
1 parent c43c901 commit e852036

19 files changed

+1602
-13
lines changed

CMakeLists.txt

+10
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,16 @@ if(USE_OPENMP)
199199
endif()
200200
endif()
201201

202+
if(USE_LAPACK)
203+
add_definitions(-DMXNET_USE_LAPACK=1)
204+
else(USE_LAPACK)
205+
# Workaround for Windows until using new Jenkinsfile.
206+
if(USE_BLAS STREQUAL "open")
207+
add_definitions(-DMXNET_USE_LAPACK=1)
208+
endif()
209+
endif()
210+
211+
202212
if(UNIX)
203213
find_library(RTLIB rt)
204214
if(RTLIB)

Jenkinsfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ USE_CPP_PACKAGE=1 \
157157
init_git_win()
158158
bat """mkdir build_vc14_cpu
159159
cd build_vc14_cpu
160-
cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 ${env.WORKSPACE}"""
160+
cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 ${env.WORKSPACE}"""
161161
bat 'C:\\mxnet\\build_vc14_cpu.bat'
162162

163163
bat '''rmdir /s/q pkg_vc14_gpu
@@ -188,7 +188,7 @@ del /Q *.7z
188188
bat """mkdir build_vc14_gpu
189189
call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat"
190190
cd build_vc14_gpu
191-
cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release ${env.WORKSPACE}"""
191+
cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release ${env.WORKSPACE}"""
192192
bat 'C:\\mxnet\\build_vc14_gpu.bat'
193193
bat '''rmdir /s/q pkg_vc14_gpu
194194
mkdir pkg_vc14_gpu\\lib

Makefile

+10
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ else
106106
endif
107107
endif
108108

109+
# lapack settings.
110+
ifeq ($(USE_LAPACK), 1)
111+
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),openblas apple atlas mkl))
112+
CFLAGS += -DMXNET_USE_LAPACK
113+
endif
114+
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),openblas atlas mkl))
115+
LDFLAGS += -llapack
116+
endif
117+
endif
118+
109119
ifeq ($(USE_CUDNN), 1)
110120
CFLAGS += -DMSHADOW_USE_CUDNN=1
111121
LDFLAGS += -lcudnn

appveyor.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ before_build:
5252
5353
set OpenCV_DIR=%APPVEYOR_BUILD_FOLDER%/%MXNET_OPENCV_DIR%/build
5454
55-
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
55+
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
5656
5757
build_script:
5858
- cmd: >-

docs/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ MAINTAINER Mu Li <muli@cs.cmu.edu>
55
# First, build MXNet binaries (ref mxnet/docker/cpu/Dockerfile)
66
#
77

8-
RUN apt-get update && apt-get install -y build-essential git libopenblas-dev libopencv-dev
8+
RUN apt-get update && apt-get install -y build-essential git libopenblas-dev liblapack-dev libopencv-dev
99
RUN git clone --recursive https://github.com/dmlc/mxnet/ && cd mxnet && \
1010
cp make/config.mk . && \
1111
echo "USE_BLAS=openblas" >>config.mk && \

docs/api/python/symbol.md

+15
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,21 @@ Composite multiple symbols into a new one by an operator.
393393
argmin
394394
```
395395

396+
### Linear Algebra
397+
398+
```eval_rst
399+
.. autosummary::
400+
:nosignatures:
401+
402+
linalg_gemm
403+
linalg_gemm2
404+
linalg_potrf
405+
linalg_potri
406+
linalg_trmm
407+
linalg_trsm
408+
linalg_sumlogdiag
409+
```
410+
396411
### Miscellaneous
397412

398413
```eval_rst

docs/get_started/install.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,9 @@ $ sudo apt-get install -y build-essential git
209209

210210
**Step 2** Install OpenBLAS.
211211

212-
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations on CPU machine. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
212+
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
213213
```bash
214-
$ sudo apt-get install -y libopenblas-dev
214+
$ sudo apt-get install -y libopenblas-dev liblapack-dev
215215
```
216216

217217
**Step 3** Install OpenCV.
@@ -429,9 +429,9 @@ $ sudo apt-get install -y build-essential git
429429
```
430430
**Step 2** Install OpenBLAS.
431431

432-
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
432+
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
433433
```bash
434-
$ sudo apt-get install -y libopenblas-dev
434+
$ sudo apt-get install -y libopenblas-dev liblapack-dev
435435
```
436436

437437
**Step 3** Install OpenCV.
@@ -751,9 +751,9 @@ $ sudo apt-get install -y build-essential git
751751

752752
**Step 2** Install OpenBLAS.
753753

754-
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations on CPU machine. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
754+
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
755755
```bash
756-
$ sudo apt-get install -y libopenblas-dev
756+
$ sudo apt-get install -y libopenblas-dev liblapack-dev
757757
```
758758

759759
**Step 3** Install OpenCV.
@@ -823,9 +823,9 @@ $ sudo apt-get install -y build-essential git
823823
```
824824
**Step 2** Install OpenBLAS.
825825

826-
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
826+
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
827827
```bash
828-
$ sudo apt-get install -y libopenblas-dev
828+
$ sudo apt-get install -y libopenblas-dev liblapack-dev
829829
```
830830

831831
**Step 3** Install OpenCV.

include/mxnet/c_lapack_api.h

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*!
2+
* Copyright (c) 2017 by Contributors
3+
* \file c_lapack_api.h
4+
* \brief Unified interface for LAPACK calls from within mxnet.
5+
* Purpose is to hide the platform specific differences.
6+
*/
7+
#ifndef MXNET_C_LAPACK_API_H_
8+
#define MXNET_C_LAPACK_API_H_
9+
10+
// Manually maintained list of LAPACK interfaces that can be used
11+
// within MXNET. Conventions:
12+
// - Interfaces must be compliant with lapacke.h in terms of signature and
13+
// naming conventions so wrapping a function "foo" which has the
14+
// signature
15+
// lapack_int LAPACKE_foo(int, char, lapack_int, float* , lapack_int)
16+
// within lapacke.h should result in a wrapper with the following signature
17+
// int MXNET_LAPACK_foo(int, char, int, float* , int)
18+
// Note that function signatures in lapacke.h will always have as first
19+
// argument the storage order (row/col-major). All wrappers have to support
20+
// that argument. The underlying fortran functions will always assume a
21+
// column-major layout. It is the responsibility of the wrapper function
22+
// to handle the (usual) case that it is called with data in row-major
23+
// format, either by doing appropriate transpositions explicitly or using
24+
// transposition options of the underlying fortran function.
25+
// - It is ok to assume that matrices are stored in contiguous memory
26+
// (which removes the need to do special handling for lda/ldb parameters
27+
// and enables us to save additional matrix transpositions around
28+
// the fortran calls).
29+
// - It is desired to add some basic checking in the C++-wrappers in order
30+
// to catch simple mistakes when calling these wrappers.
31+
// - Must support compilation without lapack-package but issue runtime error in this case.
32+
33+
#include <dmlc/logging.h>
34+
35+
extern "C" {
36+
// Fortran signatures
37+
#define MXNET_LAPACK_FSIGNATURE1(func, dtype) \
38+
void func##_(char* uplo, int* n, dtype* a, int* lda, int *info);
39+
40+
MXNET_LAPACK_FSIGNATURE1(spotrf, float)
41+
MXNET_LAPACK_FSIGNATURE1(dpotrf, double)
42+
MXNET_LAPACK_FSIGNATURE1(spotri, float)
43+
MXNET_LAPACK_FSIGNATURE1(dpotri, double)
44+
}
45+
46+
#define MXNET_LAPACK_ROW_MAJOR 101
47+
#define MXNET_LAPACK_COL_MAJOR 102
48+
49+
#define CHECK_LAPACK_CONTIGUOUS(a, b) \
50+
CHECK_EQ(a, b) << "non contiguous memory for array in lapack call";
51+
52+
#define CHECK_LAPACK_UPLO(a) \
53+
CHECK(a == 'U' || a == 'L') << "neither L nor U specified as triangle in lapack call";
54+
55+
inline char loup(char uplo, bool invert) { return invert ? (uplo == 'U' ? 'L' : 'U') : uplo; }
56+
57+
#if MXNET_USE_LAPACK
58+
59+
#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
60+
inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda ) { \
61+
CHECK_LAPACK_CONTIGUOUS(n, lda); \
62+
CHECK_LAPACK_UPLO(uplo); \
63+
char o(loup(uplo, (matrix_layout == MXNET_LAPACK_ROW_MAJOR))); \
64+
int ret(0); \
65+
func##_(&o, &n, a, &lda, &ret); \
66+
return ret; \
67+
}
68+
MXNET_LAPACK_CWRAPPER1(spotrf, float)
69+
MXNET_LAPACK_CWRAPPER1(dpotrf, double)
70+
MXNET_LAPACK_CWRAPPER1(spotri, float)
71+
MXNET_LAPACK_CWRAPPER1(dpotri, double)
72+
73+
#else
74+
// use pragma message instead of warning
75+
#pragma message("Warning: lapack usage not enabled, linalg-operators will be not available." \
76+
" Build with USE_LAPACK=1 to get lapack functionalities.")
77+
78+
// Define compilable stubs.
79+
#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
80+
inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda ) { \
81+
LOG(FATAL) << "MXNet build without lapack. Function " << #func << " is not available."; \
82+
return 1; \
83+
}
84+
MXNET_LAPACK_CWRAPPER1(spotrf, float)
85+
MXNET_LAPACK_CWRAPPER1(dpotrf, double)
86+
MXNET_LAPACK_CWRAPPER1(spotri, float)
87+
MXNET_LAPACK_CWRAPPER1(dpotri, double)
88+
89+
#endif
90+
91+
#endif // MXNET_C_LAPACK_API_H_

include/mxnet/tensor_blob.h

+28
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,34 @@ class TBlob {
291291
return this->get_with_shape<Device, 3, DType>(
292292
this->shape_.FlatTo3D(axis_begin, axis_end), stream);
293293
}
294+
/*!
295+
* \brief flatten the tensor to specified number of dimensions,
296+
* collapse the highest dimensions or pad with higher dimensions
297+
* \param stream the possible stream target tensor should reside on
298+
* \tparam Device which device the tensor is on
299+
* \tparam dim desired number of dimensions of returned tensor
300+
* \tparam DType the type of elements in the tensor
301+
* \return tensor after flatten
302+
*/
303+
template<typename Device, int dim, typename DType>
304+
inline mshadow::Tensor<Device, dim, DType> FlatToKD(
305+
mshadow::Stream<Device> *stream = NULL) const {
306+
mshadow::Shape<dim> shape;
307+
shape[0] = 1;
308+
// Pad higher dimensions in case dim > ndim()
309+
for (int i = 0; i < dim - ndim(); ++i) {
310+
shape[i] = 1;
311+
}
312+
// Collapse higher dimensions in case dim < ndim()
313+
for (int i = 0; i < ndim() - dim + 1; ++i) {
314+
shape[0] *= shape_[i];
315+
}
316+
// Preserve lower dimensions.
317+
for (int i = std::max(0, ndim() - dim + 1); i < ndim(); ++i) {
318+
shape[i - ndim() + dim] = shape_[i];
319+
}
320+
return this->get_with_shape<Device, dim, DType>(shape, stream);
321+
}
294322

295323
private:
296324
static DLDataType DTypeTransform(int type_flag) {

make/config.mk

+3
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ USE_OPENCV = 1
6565
# use openmp for parallelization
6666
USE_OPENMP = 1
6767

68+
# whether use lapack during compilation
69+
# only effective when compiled with blas versions openblas/apple/atlas/mkl
70+
USE_LAPACK = 0
6871

6972
# MKL ML Library for Intel CPU/Xeon Phi
7073
# Please refer to MKL_README.md for details

make/osx.mk

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ USE_OPENCV = 1
6262
# use openmp for parallelization
6363
USE_OPENMP = 0
6464

65+
# whether use lapack during compilation
66+
# only effective when compiled with blas versions openblas/apple/atlas/mkl
67+
USE_LAPACK = 0
68+
6569
# choose the version of blas you want to use
6670
# can be: mkl, blas, atlas, openblas
6771
USE_BLAS = apple

make/pip_linux_cpu.mk

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ ADD_CFLAGS += -Ldeps/lib -Ideps/include
2929
# matrix computation libraries for CPU/GPU
3030
#---------------------------------------------
3131

32+
# whether use lapack during compilation
33+
# only effective when compiled with blas versions openblas/apple/atlas/mkl
34+
# you can disable it, however, you will not be able to use linalg-operators
35+
USE_LAPACK = 0
36+
3237
# choose the version of blas you want to use
3338
# can be: mkl, blas, atlas, openblas
3439
# in default use atlas for linux while apple for osx

make/readthedocs.mk

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ USE_OPENMP = 0
3232
# can be: mkl, blas, atlas, openblas
3333
USE_STATIC_MKL = NONE
3434
USE_BLAS = NONE
35+
USE_LAPACK = 0
36+
3537
#
3638
# add path to intel library, you may need it
3739
# for MKL, if you did not add the path to environment variable

src/operator/elemwise_op_common.h

+17
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,23 @@ struct ElemwiseGradUseOut {
9696
}
9797
};
9898

99+
// Transfer gradient and input and output to FGradient function
100+
struct ElemwiseGradUseInOut {
101+
const char *op_name;
102+
std::vector<nnvm::NodeEntry> operator()(const nnvm::NodePtr& n,
103+
const std::vector<nnvm::NodeEntry>& ograds) {
104+
std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
105+
for (auto& h : n->inputs) {
106+
heads.push_back(h);
107+
}
108+
index_t n_out = n->num_outputs();
109+
for (index_t i = 0; i < n_out; ++i) {
110+
heads.emplace_back(nnvm::NodeEntry{n, i, 0});
111+
}
112+
return MakeGradNode(op_name, n, heads, n->attrs.dict);
113+
}
114+
};
115+
99116
// Transfer only gradient to FGradient function
100117
struct ElemwiseGradUseNone {
101118
const char *op_name;

0 commit comments

Comments
 (0)