diff --git a/CMakeLists.txt b/CMakeLists.txt index f0779abd06d4..af131ca9c38a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,8 @@ message(STATUS "CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}") message(STATUS "CMAKE_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}") +set(USE_X86_ARCH "NONE" CACHE STRING "Build with x86 options for -march") + if(USE_TVM_OP) add_definitions(-DMXNET_USE_TVM_OP=1) endif() @@ -158,6 +160,11 @@ else(MSVC) else() add_definitions(-DMSHADOW_USE_F16C=0) endif() + if(NOT USE_X86_ARCH STREQUAL "NONE") + check_cxx_compiler_flag("-march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}" SUPPORT_X86_ARCH) + else() + set(SUPPORT_X86_ARCH FALSE) + endif() set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas -Wno-sign-compare") if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$") @@ -179,6 +186,9 @@ else(MSVC) elseif(SUPPORT_MSSE2) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2") endif() + if(SUPPORT_X86_ARCH) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}") + endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_FLAGS}") if(SUPPORT_CXX14) add_definitions(-DDMLC_USE_CXX11=1) @@ -274,9 +284,11 @@ if(USE_MKLDNN) set(MKLDNN_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) set(MKLDNN_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE) - set(MKLDNN_ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE) set(MKLDNN_ENABLE_JIT_PROFILING OFF CACHE INTERNAL "" FORCE) set(MKLDNN_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE) + if(SUPPORT_X86_ARCH) + set(MKLDNN_ARCH_OPT_FLAGS "-march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}" CACHE INTERNAL "" FORCE) + endif() add_subdirectory(3rdparty/mkldnn) diff --git a/Makefile b/Makefile index 1b858cc48671..07458ce9f027 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,14 @@ CFLAGS += -DDMLC_MODERN_THREAD_LOCAL=0 # disable stack trace in exception by default. CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0 +ifndef USE_X86_ARCH + USE_X86_ARCH=NONE +endif + +ifneq ($(USE_X86_ARCH), NONE) + CFLAGS += -march=$(USE_X86_ARCH) -mtune=$(USE_X86_ARCH) +endif + ifeq ($(DEV), 1) CFLAGS += -g -Werror NVCCFLAGS += -Werror cross-execution-space-call diff --git a/include/mxnet/libinfo.h b/include/mxnet/libinfo.h index 1972688c7739..22d0c32618af 100644 --- a/include/mxnet/libinfo.h +++ b/include/mxnet/libinfo.h @@ -154,7 +154,9 @@ enum : unsigned { CPU_SSE4A, // AMD extensions to SSE4 CPU_AVX, CPU_AVX2, - + CPU_AVX512F, + CPU_AVX512BW, + CPU_AVX512VNNI, // Multiprocessing / CPU / System OPENMP, diff --git a/make/config.mk b/make/config.mk index 982d15b19656..899de70f317c 100644 --- a/make/config.mk +++ b/make/config.mk @@ -163,6 +163,10 @@ endif # For cross compilation, please check support for F16C on target device and turn off if necessary. USE_F16C = +# -march and -mtune option for x86, see https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +# eg. USE_X86_ARCH=native will enable -march=native and -mtune=native +USE_X86_ARCH = NONE + #---------------------------- # distributed computing #---------------------------- diff --git a/mkldnn.mk b/mkldnn.mk index 056cb4e51051..a6644f4720b5 100644 --- a/mkldnn.mk +++ b/mkldnn.mk @@ -26,12 +26,19 @@ endif mkldnn_FLAGS = -DCMAKE_INSTALL_PREFIX=$(MKLDNNROOT) mkldnn_FLAGS += -DCMAKE_INSTALL_LIBDIR=lib mkldnn_FLAGS += -B$(MKLDNN_BUILDDIR) -mkldnn_FLAGS += -DMKLDNN_ARCH_OPT_FLAGS="" mkldnn_FLAGS += -DMKLDNN_BUILD_TESTS=OFF mkldnn_FLAGS += -DMKLDNN_BUILD_EXAMPLES=OFF mkldnn_FLAGS += -DMKLDNN_ENABLE_JIT_PROFILING=OFF mkldnn_FLAGS += -DMKLDNN_LIBRARY_TYPE=STATIC +ifndef USE_X86_ARCH + USE_X86_ARCH=NONE +endif + +ifneq ($(USE_X86_ARCH), NONE) + mkldnn_FLAGS += -DMKLDNN_ARCH_OPT_FLAGS="-march=$(USE_X86_ARCH) -mtune=$(USE_X86_ARCH)" +endif + ifneq ($(USE_OPENMP), 1) mkldnn_FLAGS += -DMKLDNN_CPU_RUNTIME=SEQ endif diff --git a/src/libinfo.cc b/src/libinfo.cc index b31d7e4301e0..173beb303270 100644 --- a/src/libinfo.cc +++ b/src/libinfo.cc @@ -67,6 +67,15 @@ class FeatureSet { #if __AVX2__ feature_bits.set(CPU_AVX2); #endif +#if __AVX512F__ + feature_bits.set(CPU_AVX512F); +#endif +#if __AVX512BW__ + feature_bits.set(CPU_AVX512BW); +#endif +#if __AVX512VNNI__ + feature_bits.set(CPU_AVX512VNNI); +#endif // CPU feature_bits.set(OPENMP, MXNET_USE_OPENMP); @@ -144,6 +153,9 @@ const std::vector EnumNames::names = { "CPU_SSE4A", "CPU_AVX", "CPU_AVX2", + "CPU_AVX512F", + "CPU_AVX512BW", + "CPU_AVX512VNNI", "OPENMP", "SSE", "F16C",