Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Closed
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ message(STATUS "CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}")

message(STATUS "CMAKE_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}")

set(USE_X86_ARCH "NONE" CACHE STRING "Build with x86 options for -march")

if(USE_TVM_OP)
add_definitions(-DMXNET_USE_TVM_OP=1)
endif()
Expand Down Expand Up @@ -158,6 +160,11 @@ else(MSVC)
else()
add_definitions(-DMSHADOW_USE_F16C=0)
endif()
if(NOT USE_X86_ARCH STREQUAL "NONE")
check_cxx_compiler_flag("-march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}" SUPPORT_X86_ARCH)
else()
set(SUPPORT_X86_ARCH FALSE)
endif()
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas -Wno-sign-compare")
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
Expand All @@ -179,6 +186,9 @@ else(MSVC)
elseif(SUPPORT_MSSE2)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2")
endif()
if(SUPPORT_X86_ARCH)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_FLAGS}")
if(SUPPORT_CXX14)
add_definitions(-DDMLC_USE_CXX11=1)
Expand Down Expand Up @@ -274,9 +284,11 @@ if(USE_MKLDNN)

set(MKLDNN_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
set(MKLDNN_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE)
set(MKLDNN_ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE)
set(MKLDNN_ENABLE_JIT_PROFILING OFF CACHE INTERNAL "" FORCE)
set(MKLDNN_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE)
if(SUPPORT_X86_ARCH)
set(MKLDNN_ARCH_OPT_FLAGS "-march=${USE_X86_ARCH} -mtune=${USE_X86_ARCH}" CACHE INTERNAL "" FORCE)
endif()

add_subdirectory(3rdparty/mkldnn)

Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ CFLAGS += -DDMLC_MODERN_THREAD_LOCAL=0
# disable stack trace in exception by default.
CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0

ifndef USE_X86_ARCH
USE_X86_ARCH=NONE
endif

ifneq ($(USE_X86_ARCH), NONE)
CFLAGS += -march=$(USE_X86_ARCH) -mtune=$(USE_X86_ARCH)
endif

ifeq ($(DEV), 1)
CFLAGS += -g -Werror
NVCCFLAGS += -Werror cross-execution-space-call
Expand Down
4 changes: 3 additions & 1 deletion include/mxnet/libinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ enum : unsigned {
CPU_SSE4A, // AMD extensions to SSE4
CPU_AVX,
CPU_AVX2,

CPU_AVX512F,
CPU_AVX512BW,
CPU_AVX512VNNI,

// Multiprocessing / CPU / System
OPENMP,
Expand Down
4 changes: 4 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ endif
# For cross compilation, please check support for F16C on target device and turn off if necessary.
USE_F16C =

# -march and -mtune option for x86, see https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
# eg. USE_X86_ARCH=native will enable -march=native and -mtune=native
USE_X86_ARCH = NONE

#----------------------------
# distributed computing
#----------------------------
Expand Down
9 changes: 8 additions & 1 deletion mkldnn.mk
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@ endif
mkldnn_FLAGS = -DCMAKE_INSTALL_PREFIX=$(MKLDNNROOT)
mkldnn_FLAGS += -DCMAKE_INSTALL_LIBDIR=lib
mkldnn_FLAGS += -B$(MKLDNN_BUILDDIR)
mkldnn_FLAGS += -DMKLDNN_ARCH_OPT_FLAGS=""
mkldnn_FLAGS += -DMKLDNN_BUILD_TESTS=OFF
mkldnn_FLAGS += -DMKLDNN_BUILD_EXAMPLES=OFF
mkldnn_FLAGS += -DMKLDNN_ENABLE_JIT_PROFILING=OFF
mkldnn_FLAGS += -DMKLDNN_LIBRARY_TYPE=STATIC

ifndef USE_X86_ARCH
USE_X86_ARCH=NONE
endif

ifneq ($(USE_X86_ARCH), NONE)
mkldnn_FLAGS += -DMKLDNN_ARCH_OPT_FLAGS="-march=$(USE_X86_ARCH) -mtune=$(USE_X86_ARCH)"
endif

ifneq ($(USE_OPENMP), 1)
mkldnn_FLAGS += -DMKLDNN_CPU_RUNTIME=SEQ
endif
Expand Down
12 changes: 12 additions & 0 deletions src/libinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ class FeatureSet {
#if __AVX2__
feature_bits.set(CPU_AVX2);
#endif
#if __AVX512F__
feature_bits.set(CPU_AVX512F);
#endif
#if __AVX512BW__
feature_bits.set(CPU_AVX512BW);
#endif
#if __AVX512VNNI__
feature_bits.set(CPU_AVX512VNNI);
#endif

// CPU
feature_bits.set(OPENMP, MXNET_USE_OPENMP);
Expand Down Expand Up @@ -144,6 +153,9 @@ const std::vector<std::string> EnumNames::names = {
"CPU_SSE4A",
"CPU_AVX",
"CPU_AVX2",
"CPU_AVX512F",
"CPU_AVX512BW",
"CPU_AVX512VNNI",
"OPENMP",
"SSE",
"F16C",
Expand Down