diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index d4ea66d7bccd..6095230e14b3 100644 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -337,7 +337,8 @@ const float kPi = 3.1415926f; typedef index_t openmp_index_t; #endif -#if MSHADOW_USE_MKL && MXNET_USE_LAPACK + +#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK) || MXNET_USE_ILP64_LAPACKE // lapack_index_t could be replaced by index_t and removed when all blas library support large tensor typedef index_t lapack_index_t; #else diff --git a/CMakeLists.txt b/CMakeLists.txt index dd1206adeab9..80e319a1b4bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -316,6 +316,7 @@ if(USE_ASAN) endif() list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS}) +message("After choosing blas, linking to ${mxnet_LINKER_LIBS}") foreach(var ${C_CXX_INCLUDE_DIRECTORIES}) include_directories(${var}) @@ -443,19 +444,19 @@ elseif(UNIX) list(APPEND mxnet_LINKER_LIBS Threads::Threads) endif() - # ---[ LAPack if(USE_LAPACK) message("USE_LAPACK is ON") add_definitions(-DMXNET_USE_LAPACK=1) - # In the CMAKE_BUILD_TYPE="Distribution" case, we link against Blas libraries - # that already provide lapack symbols. Thus -llapack would be wrong. - if (NOT MSVC AND NOT CMAKE_BUILD_TYPE STREQUAL "Distribution") - list(APPEND mxnet_LINKER_LIBS lapack) + if(NOT USE_LAPACKE_INTERFACE) + # BLAS=open case is handled in ChooseBlas.cmake + if(NOT MSVC AND NOT CMAKE_BUILD_TYPE STREQUAL "Distribution" + AND NOT BLAS STREQUAL "Open" AND NOT BLAS STREQUAL "open") + list(APPEND mxnet_LINKER_LIBS lapack) + endif() endif() endif() - # ---[ jemalloc if(USE_JEMALLOC) find_package(JeMalloc) diff --git a/ci/docker/Dockerfile.build.centos7 b/ci/docker/Dockerfile.build.centos7 index 6ae6df7f15ec..f7b90c9415ec 100644 --- a/ci/docker/Dockerfile.build.centos7 +++ b/ci/docker/Dockerfile.build.centos7 @@ -59,17 +59,29 @@ RUN yum -y check-update || true && \ # Libraries # Provide clbas headerfiles atlas-devel \ - openblas-devel \ - lapack-devel \ opencv-devel \ openssl-devel \ zeromq-devel \ # Build-dependencies for ccache 3.7.9 gperf \ libb2-devel \ - libzstd-devel && \ + libzstd-devel \ + # Required by openblas build + gcc-gfortran && \ yum clean all +# Build OpenBLAS from source +RUN mkdir ~/openblas && \ + cd ~/openblas && \ + OPENBLAS_VERSION=0.3.10 && \ + wget \ + https://github.com/xianyi/OpenBLAS/archive/v${OPENBLAS_VERSION}.zip \ + -O openblas.zip && \ + unzip -q openblas.zip -d . && \ + cd OpenBLAS-${OPENBLAS_VERSION} && \ + CXX="g++ -fPIC" CC="gcc -fPIC" make -j DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 && \ + make PREFIX=/usr/local install + # Make Python 3.6 and Maven 3.3 Software Collections available by default during # the following build steps in this Dockerfile SHELL [ "/usr/bin/scl", "enable", "devtoolset-7", "rh-python36", "rh-maven35" ] diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu index 73494a6784a8..952f4c40a8bb 100644 --- a/ci/docker/Dockerfile.build.ubuntu +++ b/ci/docker/Dockerfile.build.ubuntu @@ -58,14 +58,13 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ g++ \ g++-8 \ intel-mkl-2020.0-088 \ + libomp-dev \ ## Dependencies libgomp1 \ libturbojpeg0-dev \ - libopenblas-dev \ libcurl4-openssl-dev \ libatlas-base-dev \ libzmq3-dev \ - liblapack-dev \ libopencv-dev \ libxml2-dev \ # BytePS @@ -82,9 +81,31 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ autoconf \ gperf \ libb2-dev \ - libzstd-dev && \ + libzstd-dev \ + gfortran && \ rm -rf /var/lib/apt/lists/* +# Build OpenBLAS from source +RUN export LIBRARY_PATH=$LIBRARY_PATH:/usr/lib/gcc/x86_64-linux-gnu/7/ && \ + mkdir ~/openblas && \ + cd ~/openblas && \ + OPENBLAS_VERSION=0.3.10 && \ + wget \ + https://github.com/xianyi/OpenBLAS/archive/v${OPENBLAS_VERSION}.zip \ + -O openblas.zip && \ + unzip -q openblas.zip -d . && \ + cd OpenBLAS-${OPENBLAS_VERSION} && \ + CXX="clang++-6.0 -fPIC" CC="clang-6.0 -fPIC" make -j DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 \ + USE_OPENMP=0 INTERFACE64=1 BINARY=64 && \ + make PREFIX=/usr/local/openblas-clang install && \ + cd .. && \ + rm -rf OpenBLAS-${OPENBLAS_VERSION} && \ + unzip -q openblas.zip -d . && \ + cd OpenBLAS-${OPENBLAS_VERSION} && \ + CXX="g++ -fPIC" CC="gcc -fPIC" make -j DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 \ + USE_OPENMP=1 INTERFACE64=1 BINARY=64 && \ + make PREFIX=/usr/local install + # ccache 3.7.9 has fixes for caching nvcc outputs RUN cd /usr/local/src && \ git clone --recursive https://github.com/ccache/ccache.git && \ diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index c8ec09a33943..be8cdfb11a2b 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -439,6 +439,7 @@ build_ubuntu_gpu_clang10_werror() { build_ubuntu_cpu_clang6() { set -ex cd /work/build + export OpenBLAS_HOME=/usr/local/openblas-clang/ CXX=clang++-6.0 CC=clang-6.0 cmake \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=OFF \ @@ -452,6 +453,7 @@ build_ubuntu_cpu_clang6() { build_ubuntu_cpu_clang100() { set -ex cd /work/build + export OpenBLAS_HOME=/usr/local/openblas-clang/ CXX=clang++-10 CC=clang-10 cmake \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=OFF \ @@ -465,6 +467,7 @@ build_ubuntu_cpu_clang100() { build_ubuntu_cpu_clang_tidy() { set -ex cd /work/build + export OpenBLAS_HOME=/usr/local/openblas-clang/ # TODO(leezu) USE_OPENMP=OFF 3rdparty/dmlc-core/CMakeLists.txt:79 broken? CXX=clang++-10 CC=clang-10 cmake \ -DUSE_MKL_IF_AVAILABLE=OFF \ @@ -481,6 +484,7 @@ build_ubuntu_cpu_clang_tidy() { build_ubuntu_cpu_clang6_mkldnn() { set -ex cd /work/build + export OpenBLAS_HOME=/usr/local/openblas-clang/ CXX=clang++-6.0 CC=clang-6.0 cmake \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=ON \ @@ -493,6 +497,7 @@ build_ubuntu_cpu_clang6_mkldnn() { build_ubuntu_cpu_clang100_mkldnn() { set -ex cd /work/build + export OpenBLAS_HOME=/usr/local/openblas-clang/ CXX=clang++-10 CC=clang-10 cmake \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=ON \ diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake index 00bde5320f67..a29ef94206c1 100644 --- a/cmake/ChooseBlas.cmake +++ b/cmake/ChooseBlas.cmake @@ -45,6 +45,81 @@ elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") add_definitions(-DMSHADOW_USE_CBLAS=1) add_definitions(-DMSHADOW_USE_MKL=0) add_definitions(-DMXNET_USE_BLAS_OPEN=1) + if(NOT MSVC) + # check if we need to link to omp + execute_process(COMMAND ${CMAKE_NM} -g ${OpenBLAS_LIB} + COMMAND grep omp_get_num_threads + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE OPENBLAS_USES_OMP_OUT + RESULT_VARIABLE OPENBLAS_USES_OMP_RET) + if(NOT OPENBLAS_USES_OMP_OUT STREQUAL "" AND NOT OPENBLAS_USES_OMP_RET AND NOT USE_OPENMP) + message("Openblas uses OMP, automatically linking to it") + find_package(OpenMP REQUIRED) + message("OpenMP_CXX_LIBRARIES is ${OpenMP_CXX_LIBRARIES}") + list(APPEND mshadow_LINKER_LIBS "${OpenMP_CXX_LIBRARIES}") + endif() + # check if we need to link to gfortran + execute_process(COMMAND ${CMAKE_NM} -g ${OpenBLAS_LIB} + COMMAND grep gfortran + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE OPENBLAS_USES_GFORTRAN_OUT + RESULT_VARIABLE OPENBLAS_USES_GFORTRAN_RET) + if(NOT OPENBLAS_USES_GFORTRAN_OUT STREQUAL "" AND NOT OPENBLAS_USES_GFORTRAN_RET) + message("Openblas uses GFortran, automatically linking to it") + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/temp/CMakeLists.txt" + "cmake_minimum_required(VERSION ${CMAKE_VERSION}) +project(CheckFortran Fortran) +set(CMAKE_Fortran_COMPILER gfortran) +file(WRITE \"${CMAKE_CURRENT_BINARY_DIR}/temp/FortranDir.cmake\" +\" +set(FORTRAN_DIR \\\"\$\{CMAKE_Fortran_IMPLICIT_LINK_DIRECTORIES\}\\\") +\") +") + execute_process( + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/temp/ + COMMAND ${CMAKE_COMMAND} . + ) + set(FORTRAN_DIR "") + include(build/temp/FortranDir.cmake) + find_library(FORTRAN_LIB NAMES gfortran HINTS ${FORTRAN_DIR}) + message("FORTRAN_DIR is ${FORTRAN_DIR}") + message("FORTRAN_LIB is ${FORTRAN_LIB}") + list(APPEND mshadow_LINKER_LIBS ${FORTRAN_LIB}) + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/temp/") + endif() + # check the lapack flavor of openblas + include(CheckSymbolExists) + check_symbol_exists(OPENBLAS_USE64BITINT "${OpenBLAS_INCLUDE_DIR}/openblas_config.h" OPENBLAS_ILP64) + if(OPENBLAS_ILP64) + message("Using ILP64 OpenBLAS") + if(NOT USE_INT64_TENSOR_SIZE) + message(FATAL_ERROR "Must set USE_INT64_TENSOR_SIZE=1 when using ILP64 OpenBLAS") + endif() + else() + message("Using LP64 OpenBLAS") + endif() + if(USE_LAPACK) + if(EXISTS "${OpenBLAS_INCLUDE_DIR}/lapacke.h") + message("Detected lapacke.h, automatically using the LAPACKE interface") + add_definitions(-DMXNET_USE_LAPACKE_INTERFACE=1) + set(USE_LAPACKE_INTERFACE 1) + if(OPENBLAS_ILP64) + message("Detected ILP64 LAPACKE") + add_definitions(-DMXNET_USE_ILP64_LAPACKE=1) + endif() + else() + execute_process(COMMAND ${CMAKE_NM} -g ${OpenBLAS_LIB} + COMMAND grep sgetri_ + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE OPENBLAS_CONTAINS_C_LAPACK_OUT + RESULT_VARIABLE OPENBLAS_CONTAINS_C_LAPACK_RET) + if(OPENBLAS_CONTAINS_C_LAPACK_OUT STREQUAL "" + AND NOT OPENBLAS_CONTAINS_C_LAPACK_RET) + list(APPEND mshadow_LINKER_LIBS lapack) + endif() + endif() + endif() + endif() elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl") if (USE_INT64_TENSOR_SIZE) set(MKL_USE_ILP64 ON CACHE BOOL "enable using ILP64 in MKL" FORCE) diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake index a3a79caae461..db1cdb0c0644 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake @@ -19,6 +19,8 @@ file(TO_CMAKE_PATH "$ENV{OpenBLAS_HOME}" OpenBLAS_HOME) file(TO_CMAKE_PATH "$ENV{OpenBLAS}" OpenBLAS_DIR) SET(Open_BLAS_INCLUDE_SEARCH_PATHS + ${OpenBLAS_HOME} + ${OpenBLAS_HOME}/include /usr/include /usr/include/openblas /usr/include/openblas-base @@ -29,11 +31,11 @@ SET(Open_BLAS_INCLUDE_SEARCH_PATHS /usr/local/opt/openblas/include ${PROJECT_SOURCE_DIR}/3rdparty/OpenBLAS/include ${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS/include - ${OpenBLAS_HOME} - ${OpenBLAS_HOME}/include ) SET(Open_BLAS_LIB_SEARCH_PATHS + ${OpenBLAS_HOME} + ${OpenBLAS_HOME}/lib /lib/ /lib/openblas-base /lib64/ @@ -48,12 +50,10 @@ SET(Open_BLAS_LIB_SEARCH_PATHS ${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS/lib ${OpenBLAS_DIR} ${OpenBLAS_DIR}/lib - ${OpenBLAS_HOME} - ${OpenBLAS_HOME}/lib ) -FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS}) -FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) +FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h HINTS ${Open_BLAS_INCLUDE_SEARCH_PATHS}) +FIND_LIBRARY(OpenBLAS_LIB NAMES libopenblas.a HINTS ${Open_BLAS_LIB_SEARCH_PATHS}) IF(NOT OpenBLAS_LIB) FIND_FILE(OpenBLAS_LIB NAMES libopenblas.dll.a PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) ENDIF() diff --git a/config/distribution/linux_cu100.cmake b/config/distribution/linux_cu100.cmake index 357ccd457593..284b2f628b9d 100644 --- a/config/distribution/linux_cu100.cmake +++ b/config/distribution/linux_cu100.cmake @@ -32,6 +32,5 @@ set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support") set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support") - set(CUDACXX "/usr/local/cuda-10.0/bin/nvcc" CACHE STRING "Cuda compiler") set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0" CACHE STRING "Cuda architectures") diff --git a/config/distribution/linux_cu101.cmake b/config/distribution/linux_cu101.cmake index 29fdda2d6f0a..fe969e1f9da8 100644 --- a/config/distribution/linux_cu101.cmake +++ b/config/distribution/linux_cu101.cmake @@ -34,6 +34,5 @@ set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support") set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support") - set(CUDACXX "/usr/local/cuda-10.1/bin/nvcc" CACHE STRING "Cuda compiler") set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0" CACHE STRING "Cuda architectures") diff --git a/config/distribution/linux_cu102.cmake b/config/distribution/linux_cu102.cmake index 4e7e0509e4f9..00c76c14a02c 100644 --- a/config/distribution/linux_cu102.cmake +++ b/config/distribution/linux_cu102.cmake @@ -32,6 +32,5 @@ set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support") set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support") - set(CUDACXX "/usr/local/cuda-10.2/bin/nvcc" CACHE STRING "Cuda compiler") set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0" CACHE STRING "Cuda architectures") diff --git a/config/distribution/linux_cu110.cmake b/config/distribution/linux_cu110.cmake index 9c6ab5931928..b4fdb17570a6 100644 --- a/config/distribution/linux_cu110.cmake +++ b/config/distribution/linux_cu110.cmake @@ -32,6 +32,5 @@ set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support") set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support") - set(CUDACXX "/usr/local/cuda-11.0/bin/nvcc" CACHE STRING "Cuda compiler") set(MXNET_CUDA_ARCH "5.0;6.0;7.0;8.0" CACHE STRING "Cuda architectures") diff --git a/config/distribution/linux_cu92.cmake b/config/distribution/linux_cu92.cmake index 8499421f91ec..bc4f6afebdbe 100644 --- a/config/distribution/linux_cu92.cmake +++ b/config/distribution/linux_cu92.cmake @@ -31,6 +31,5 @@ set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.") set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support") set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") - set(CUDACXX "/usr/local/cuda-9.2/bin/nvcc" CACHE STRING "Cuda compiler") set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0" CACHE STRING "Cuda architectures") diff --git a/src/operator/c_lapack_api.cc b/src/operator/c_lapack_api.cc index 13f24c90f6c4..2d2109d05427 100644 --- a/src/operator/c_lapack_api.cc +++ b/src/operator/c_lapack_api.cc @@ -19,7 +19,7 @@ #include "c_lapack_api.h" -#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK) +#if (MXNET_USE_LAPACK && (MSHADOW_USE_MKL || MXNET_USE_LAPACKE_INTERFACE)) #elif MXNET_USE_LAPACK #else // use pragma message instead of warning diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h index eae5c3e25f39..71161f108ce6 100644 --- a/src/operator/c_lapack_api.h +++ b/src/operator/c_lapack_api.h @@ -70,8 +70,8 @@ using namespace mshadow; -// Will cause clash with MKL fortran layer headers -#if MSHADOW_USE_MKL == 0 +// Will cause clash with MKL/OpenBLAS fortran layer headers +#if MSHADOW_USE_MKL == 0 && MXNET_USE_LAPACKE_INTERFACE == 0 extern "C" { @@ -243,11 +243,18 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) { } -#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK) - - // We interface with the C-interface of MKL - // as this is the preferred way. - #include +#if (MXNET_USE_LAPACK && (MSHADOW_USE_MKL || MXNET_USE_LAPACKE_INTERFACE)) + #if MSHADOW_USE_MKL + #include + #else + #if MXNET_USE_ILP64_LAPACKE + #define lapack_int int64_t + #endif + // prevent multiple inclusion of complex.h in lapacke.h + #define lapack_complex_float float _Complex + #define lapack_complex_double double _Complex + #include + #endif #define MXNET_LAPACK_ROW_MAJOR LAPACK_ROW_MAJOR #define MXNET_LAPACK_COL_MAJOR LAPACK_COL_MAJOR diff --git a/tools/dependencies/openblas.sh b/tools/dependencies/openblas.sh index 12cad3ee2037..5b46c31f60ca 100755 --- a/tools/dependencies/openblas.sh +++ b/tools/dependencies/openblas.sh @@ -31,7 +31,8 @@ if [[ (! -e $DEPS_PATH/lib/libopenblas.a) ]]; then cd $DEPS_PATH/OpenBLAS-${OPENBLAS_VERSION} # Adding NO_DYNAMIC=1 flag causes make install to fail - CFLAGS="-fPIC" CXXFLAGS="-fPIC" $MAKE DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_OPENMP=1 + CFLAGS="-fPIC" CXXFLAGS="-fPIC" $MAKE DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_OPENMP=1 \ + INTERFACE64=1 BINARY=64 patchelf --set-rpath '$ORIGIN' --force-rpath libopenblas.so $MAKE PREFIX=$DEPS_PATH install diff --git a/tools/staticbuild/build_lib.sh b/tools/staticbuild/build_lib.sh index ec23ac77b5d9..429387010f5a 100755 --- a/tools/staticbuild/build_lib.sh +++ b/tools/staticbuild/build_lib.sh @@ -42,7 +42,6 @@ cd - rm -rf lib; mkdir lib; if [[ $PLATFORM == 'linux' ]]; then cp -L build/libmxnet.so lib/libmxnet.so - cp -L staticdeps/lib/libopenblas.so lib/libopenblas.so.0 cp -L $(ldd lib/libmxnet.so | grep libgfortran | awk '{print $3}') lib/ elif [[ $PLATFORM == 'darwin' ]]; then cp -L build/libmxnet.dylib lib/libmxnet.dylib