* Added declarations for decode/encode_bitmap ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Added implementation for bitmap encoding/decoding ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Added helpers for encode/decode bitmap ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored encodingBitmap helper. Signed-off-by: shugeo <sgazeos@gmail.com> * threshold encode/decode skeleton * helper skeleton * minor import fix * encoder shape fn & op impl * thresholdEncode cpu impl Signed-off-by: raver119@gmail.com <raver119@gmail.com> * thresholdDecode cpu impl Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Only cosmetical changes. Signed-off-by: shugeo <sgazeos@gmail.com> * placeholder Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Added cuda implementation for bitmap decode helper. Signed-off-by: shugeo <sgazeos@gmail.com> * cuda thresholdEstimate Signed-off-by: raver119@gmail.com <raver119@gmail.com> * cuda thresholdDecode Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * - nano cmakelist update (get rid of Clion section) - fixed forgotten throw in AtomicTests Signed-off-by: raver119@gmail.com <raver119@gmail.com> * thesholdEncode cuda impl Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Added tests for bitmap encoding/decoding ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed tests for encode/decode bitmaps. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored decode/encode helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed crashes with bitmap decode/encode helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * bitmap encode/decode CPU Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bitmap encode/decode CUDA Signed-off-by: raver119@gmail.com <raver119@gmail.com> * C API removed for threshold/bitmap encode Signed-off-by: raver119@gmail.com <raver119@gmail.com> * EncodeBitmap/DecodeBitmap Java side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * EncodeThreshold/DecodeThreshold Java side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * EncodeThreshold/DecodeThreshold Java side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more tests for threshold encoding Signed-off-by: raver119@gmail.com <raver119@gmail.com> * minor test tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * two special tests Signed-off-by: raver119@gmail.com <raver119@gmail.com> * encodeBitmap CPU fix Signed-off-by: raver119@gmail.com <raver119@gmail.com> * parallel_long/parallel_double proper spans fix Signed-off-by: raver119@gmail.com <raver119@gmail.com> * encodeThreshold CUDA fix Signed-off-by: raver119@gmail.com <raver119@gmail.com> * nano fix Signed-off-by: raver119@gmail.com <raver119@gmail.com> * grid tweaks Signed-off-by: raver119@gmail.com <raver119@gmail.com> * RTX adaptation for thresholdEncode Signed-off-by: raver119 <raver119@gmail.com> * don't allow threshold encoding for length < 2 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * get rid of NDArrayCompressor in EncodingHandler Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more minor update of EncodingHandler Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more minor tweak of EncodingHandler Signed-off-by: raver119@gmail.com <raver119@gmail.com> * - matmul allows integer data types use - EncodingHandler boundary default value - few tests for integer matmul Signed-off-by: raver119@gmail.com <raver119@gmail.com> * minor fix of CUDA bitmap encode Signed-off-by: raver119@gmail.com <raver119@gmail.com> * boundary changed to integer everywhere Signed-off-by: raver119@gmail.com <raver119@gmail.com> * boundary changed to integer everywhere Signed-off-by: raver119@gmail.com <raver119@gmail.com> * re-enable CUDA deallocator Signed-off-by: raver119@gmail.com <raver119@gmail.com> * threshold encoder fix for systems without omp Signed-off-by: raver119@gmail.com <raver119@gmail.com> * - encode_threshold now requires non-negative boundary - minor tweak in EncodingHandler Signed-off-by: raver119@gmail.com <raver119@gmail.com> * restore parallelism in decode_bitmap Signed-off-by: raver119@gmail.com <raver119@gmail.com> * fall back to omp for encode_bitmap cpu Signed-off-by: raver119@gmail.com <raver119@gmail.com> * single time casts Signed-off-by: raver119@gmail.com <raver119@gmail.com> * - additional test for encode_threshold - sync buffers to device before calling for shape function Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
152 lines
5.1 KiB
CMake
152 lines
5.1 KiB
CMake
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
|
if(LINUX)
|
|
link_directories(/usr/local/lib)
|
|
link_directories(/usr/lib)
|
|
link_directories(/lib)
|
|
endif()
|
|
|
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
|
|
if(APPLE)
|
|
message("Using apple")
|
|
link_directories(/usr/local/lib)
|
|
link_directories(/usr/lib)
|
|
link_directories(/lib)
|
|
endif()
|
|
if(WIN32)
|
|
get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
|
|
foreach(dir ${dirs})
|
|
message(STATUS "dir='${dir}'")
|
|
endforeach()
|
|
endif()
|
|
|
|
if (SD_CUDA)
|
|
find_package(CUDA)
|
|
message("Tests CUDA include directory: ${CUDA_INCLUDE_DIRS}")
|
|
include_directories(${CUDA_INCLUDE_DIRS})
|
|
add_definitions(-D__CUDABLAS__=true)
|
|
|
|
if(WIN32)
|
|
message("CUDA on Windows: enabling /EHsc")
|
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS")
|
|
endif()
|
|
|
|
if ("${COMPUTE}" STREQUAL "all")
|
|
set(CMAKE_CUDA_FLAGS " -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70")
|
|
else()
|
|
set(CMAKE_CUDA_FLAGS " -DCUDA_10 ${EXPM} -w -G -g --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}")
|
|
endif()
|
|
endif()
|
|
|
|
# -fsanitize=address
|
|
# -fsanitize=leak
|
|
if (APPLE)
|
|
set(CMAKE_CXX_FLAGS " -fPIC -D__APPLE_OS__=true")
|
|
elseif(WIN32)
|
|
if (SD_CPU)
|
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3")
|
|
endif()
|
|
|
|
if (SD_CPU AND LINUX)
|
|
set(CMAKE_CXX_FLAGS " -fPIC")
|
|
endif()
|
|
else()
|
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
|
set(CMAKE_CXX_FLAGS " -fPIC")
|
|
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
|
|
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")
|
|
else()
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
|
|
endif()
|
|
|
|
if (SD_CPU AND SD_SANITIZE)
|
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -fsanitize=address")
|
|
else()
|
|
# CUDA?
|
|
endif()
|
|
endif()
|
|
|
|
|
|
# tests are always compiled with all ops included
|
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSD_ALL_OPS=true -DBUILD_TESTS=true")
|
|
|
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
|
# using Clang
|
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
|
|
# using Intel C++
|
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE} -fp-model fast")
|
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
|
# using Visual Studio C++
|
|
|
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
# using GCC
|
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmax-errors=2")
|
|
|
|
if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux" AND NOT(MINGW))
|
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic")
|
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic")
|
|
endif()
|
|
endif()
|
|
|
|
IF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
|
include_directories("/usr/include")
|
|
include_directories("/usr/local/include")
|
|
ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
|
|
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 4.9)
|
|
message(FATAL_ERROR "You need at least GCC 4.9")
|
|
endif()
|
|
|
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
find_package(OpenMP)
|
|
endif()
|
|
if (OPENMP_FOUND)
|
|
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
|
else()
|
|
message("OPENMP NOT FOUND")
|
|
endif()
|
|
|
|
if (SD_CPU)
|
|
file(GLOB_RECURSE TEST_SOURCES false ./*.cpp ./*.h)
|
|
elseif (SD_CUDA)
|
|
file(GLOB_RECURSE TEST_SOURCES false ./*.cpp ./*.cu ./*.h)
|
|
endif()
|
|
|
|
# Filter out any source files from */CMakeFiles/* paths. these tend to cause problems such a multiple main definitions.
|
|
set (EXCLUDE_DIR "/CMakeFiles/")
|
|
foreach (TMP_PATH ${TEST_SOURCES})
|
|
string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
|
|
if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
|
|
list (REMOVE_ITEM TEST_SOURCES ${TMP_PATH})
|
|
endif ()
|
|
endforeach(TMP_PATH)
|
|
|
|
if (SD_CPU)
|
|
if (NOT BLAS_LIBRARIES)
|
|
set(BLAS_LIBRARIES "")
|
|
endif()
|
|
|
|
add_executable(runtests ${TEST_SOURCES})
|
|
target_link_libraries(runtests ${SD_LIBRARY_NAME}static ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES} gtest gtest_main)
|
|
elseif(SD_CUDA)
|
|
|
|
add_executable(runtests ${TEST_SOURCES})
|
|
|
|
if (WIN32)
|
|
message("MSVC runtime for tests: ${MSVC_RT_LIB}")
|
|
endif()
|
|
|
|
# applies to windows only
|
|
set_property(TARGET runtests PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$<CONFIG:Debug>:Debug>")
|
|
set_property(TARGET gtest PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$<CONFIG:Debug>:Debug>")
|
|
set_property(TARGET gtest_main PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$<CONFIG:Debug>:Debug>")
|
|
|
|
if (HAVE_CUDNN)
|
|
message("CUDNN library: ${CUDNN}")
|
|
endif()
|
|
|
|
target_link_libraries(runtests ${SD_LIBRARY_NAME}static ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusolver_LIBRARY} ${CUDNN} ${MKLDNN} gtest gtest_main)
|
|
endif() |