Pi build and initial ArmCompute library support (#494)
* - raspberry Pi build and ArmCompute library support - initial ArmCompute platform implementations (Maxpool2d AvgPool2d for float32) Signed-off-by: AbdelRauf <rauf@konduit.ai> * - Build script for pi - small changes Signed-off-by: AbdelRauf <rauf@konduit.ai>master
parent
fb578fdecd
commit
69ebc96068
|
@ -131,6 +131,23 @@ if(NOT SD_CUDA)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#arm-compute entry
|
||||||
|
if(${HELPERS_armcompute})
|
||||||
|
find_package(ARMCOMPUTE REQUIRED)
|
||||||
|
|
||||||
|
if(ARMCOMPUTE_FOUND)
|
||||||
|
message("Found ARMCOMPUTE: ${ARMCOMPUTE_LIBRARIES}")
|
||||||
|
set(HAVE_ARMCOMPUTE 1)
|
||||||
|
# Add preprocessor definition for ARM Compute NEON
|
||||||
|
add_definitions(-DARMCOMPUTENEON_ENABLED)
|
||||||
|
#build our library with neon support
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||||
|
include_directories(${ARMCOMPUTE_INCLUDE})
|
||||||
|
message("----${ARMCOMPUTE_INCLUDE}---")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# new mkl-dnn entry
|
# new mkl-dnn entry
|
||||||
if (${HELPERS_mkldnn})
|
if (${HELPERS_mkldnn})
|
||||||
|
|
|
@ -146,6 +146,10 @@ if (HAVE_MKLDNN)
|
||||||
file(GLOB_RECURSE CUSTOMOPS_MKLDNN_SOURCES false ../include/ops/declarable/platform/mkldnn/*.cpp ../include/ops/declarable/platform/mkldnn/mkldnnUtils.h)
|
file(GLOB_RECURSE CUSTOMOPS_MKLDNN_SOURCES false ../include/ops/declarable/platform/mkldnn/*.cpp ../include/ops/declarable/platform/mkldnn/mkldnnUtils.h)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(HAVE_ARMCOMPUTE)
|
||||||
|
file(GLOB_RECURSE CUSTOMOPS_ARMCOMPUTE_SOURCES false ../include/ops/declarable/platform/armcompute/*.cpp ../include/ops/declarable/platform/armcompute/*.h)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(SD_CUDA)
|
if(SD_CUDA)
|
||||||
message("Build cublas")
|
message("Build cublas")
|
||||||
find_package(CUDA)
|
find_package(CUDA)
|
||||||
|
@ -243,7 +247,7 @@ if(SD_CUDA)
|
||||||
${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES}
|
${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES}
|
||||||
${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES}
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES}
|
||||||
${CUSTOMOPS_GENERIC_SOURCES}
|
${CUSTOMOPS_ARMCOMPUTE_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES}
|
||||||
)
|
)
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -351,8 +355,8 @@ elseif(SD_CPU)
|
||||||
add_definitions(-D__CPUBLAS__=true)
|
add_definitions(-D__CPUBLAS__=true)
|
||||||
add_library(samediff_obj OBJECT ${LEGACY_SOURCES}
|
add_library(samediff_obj OBJECT ${LEGACY_SOURCES}
|
||||||
${LOOPS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
${LOOPS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES}
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES}
|
||||||
${OPS_SOURCES} ${PERF_SOURCES})
|
${CUSTOMOPS_ARMCOMPUTE_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES})
|
||||||
if(IOS)
|
if(IOS)
|
||||||
add_library(${SD_LIBRARY_NAME} STATIC $<TARGET_OBJECTS:samediff_obj>)
|
add_library(${SD_LIBRARY_NAME} STATIC $<TARGET_OBJECTS:samediff_obj>)
|
||||||
else()
|
else()
|
||||||
|
@ -378,12 +382,12 @@ elseif(SD_CPU)
|
||||||
if (NOT BLAS_LIBRARIES)
|
if (NOT BLAS_LIBRARIES)
|
||||||
set(BLAS_LIBRARIES "")
|
set(BLAS_LIBRARIES "")
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(${SD_LIBRARY_NAME} ${MKLDNN} ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${BLAS_LIBRARIES} ${CPU_FEATURES})
|
target_link_libraries(${SD_LIBRARY_NAME} ${MKLDNN} ${MKLDNN_LIBRARIES} ${ARMCOMPUTE_LIBRARIES} ${OPENBLAS_LIBRARIES} ${BLAS_LIBRARIES} ${CPU_FEATURES})
|
||||||
|
|
||||||
if ("${SD_ALL_OPS}" AND "${SD_BUILD_MINIFIER}")
|
if ("${SD_ALL_OPS}" AND "${SD_BUILD_MINIFIER}")
|
||||||
message(STATUS "Building minifier...")
|
message(STATUS "Building minifier...")
|
||||||
add_executable(minifier ../minifier/minifier.cpp ../minifier/graphopt.cpp)
|
add_executable(minifier ../minifier/minifier.cpp ../minifier/graphopt.cpp)
|
||||||
target_link_libraries(minifier samediff_obj ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES})
|
target_link_libraries(minifier samediff_obj ${MKLDNN_LIBRARIES} ${ARMCOMPUTE_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 4.9)
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 4.9)
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
################################################################################
|
||||||
|
# Copyright (c) 2020 Konduit K.K.
|
||||||
|
#
|
||||||
|
# This program and the accompanying materials are made available under the
|
||||||
|
# terms of the Apache License, Version 2.0 which is available at
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Find ARM COMPUTE LIBRARY STATIC libraries
|
||||||
|
|
||||||
|
SET (COMPUTE_INCLUDE_DIRS
|
||||||
|
/usr/include
|
||||||
|
${ARMCOMPUTE_ROOT}
|
||||||
|
${ARMCOMPUTE_ROOT}/include
|
||||||
|
${ARMCOMPUTE_ROOT}/applications
|
||||||
|
${ARMCOMPUTE_ROOT}/applications/arm_compute
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
SET (COMPUTE_LIB_DIRS
|
||||||
|
/lib
|
||||||
|
/usr/lib
|
||||||
|
${ARMCOMPUTE_ROOT}
|
||||||
|
${ARMCOMPUTE_ROOT}/lib
|
||||||
|
${ARMCOMPUTE_ROOT}/build
|
||||||
|
)
|
||||||
|
|
||||||
|
find_path(ARMCOMPUTE_INCLUDE arm_compute/core/CL/ICLKernel.h
|
||||||
|
PATHS ${COMPUTE_INCLUDE_DIRS}
|
||||||
|
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
|
||||||
|
find_path(ARMCOMPUTE_INCLUDE arm_compute/core/CL/ICLKernel.h)
|
||||||
|
|
||||||
|
find_path(HALF_INCLUDE half/half.hpp)
|
||||||
|
find_path(HALF_INCLUDE half/half.hpp
|
||||||
|
PATHS ${ARMCOMPUTE_ROOT}/include
|
||||||
|
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
include_directories(SYSTEM ${HALF_INCLUDE})
|
||||||
|
|
||||||
|
# Find the Arm Compute libraries if not already specified
|
||||||
|
if (NOT DEFINED ARMCOMPUTE_LIBRARIES)
|
||||||
|
|
||||||
|
find_library(ARMCOMPUTE_LIBRARY NAMES arm_compute-static
|
||||||
|
PATHS ${COMPUTE_LIB_DIRS}
|
||||||
|
PATH_SUFFIXES "Release"
|
||||||
|
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
|
||||||
|
find_library(ARMCOMPUTE_CORE_LIBRARY NAMES arm_compute_core-static
|
||||||
|
PATHS ${COMPUTE_LIB_DIRS}
|
||||||
|
PATH_SUFFIXES "Release"
|
||||||
|
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
# In case it wasn't there, try a default search (will work in cases where
|
||||||
|
# the library has been installed into a standard location)
|
||||||
|
find_library(ARMCOMPUTE_LIBRARY NAMES arm_compute-static)
|
||||||
|
find_library(ARMCOMPUTE_CORE_LIBRARY NAMES arm_compute_core-static)
|
||||||
|
|
||||||
|
set(ARMCOMPUTE_LIBRARIES ${ARMCOMPUTE_LIBRARY} ${ARMCOMPUTE_CORE_LIBRARY} )
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
INCLUDE(FindPackageHandleStandardArgs)
|
||||||
|
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ARMCOMPUTE REQUIRED_VARS ARMCOMPUTE_INCLUDE ARMCOMPUTE_LIBRARIES)
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
#cmakedefine HAVE_MKLDNN
|
#cmakedefine HAVE_MKLDNN
|
||||||
|
|
||||||
|
#cmakedefine HAVE_ARMCOMPUTE
|
||||||
|
|
||||||
#cmakedefine MKLDNN_PATH "@MKLDNN_PATH@"
|
#cmakedefine MKLDNN_PATH "@MKLDNN_PATH@"
|
||||||
|
|
||||||
#cmakedefine HAVE_OPENBLAS
|
#cmakedefine HAVE_OPENBLAS
|
||||||
|
|
|
@ -215,7 +215,9 @@ namespace helpers {
|
||||||
auto maxValue = T(0); //sd::math::nd4j_abs(compoundBuffer[xInitialIndex]);
|
auto maxValue = T(0); //sd::math::nd4j_abs(compoundBuffer[xInitialIndex]);
|
||||||
auto result = -1;
|
auto result = -1;
|
||||||
//auto loop = PRAGMA_THREADS_FOR {
|
//auto loop = PRAGMA_THREADS_FOR {
|
||||||
auto start = column, stop = rowNum, increment = 1;
|
auto start = column;
|
||||||
|
auto stop = rowNum;
|
||||||
|
auto increment = 1;
|
||||||
for (auto rowCounter = start; rowCounter < stop; rowCounter++) {
|
for (auto rowCounter = start; rowCounter < stop; rowCounter++) {
|
||||||
Nd4jLong xPos[] = {rowCounter, column};
|
Nd4jLong xPos[] = {rowCounter, column};
|
||||||
auto xIndex = shape::getOffset(compoundShape, xPos, 0);
|
auto xIndex = shape::getOffset(compoundShape, xPos, 0);
|
||||||
|
|
|
@ -0,0 +1,278 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2019 Konduit K.K.
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
// Created by Abdelrauf 2020
|
||||||
|
|
||||||
|
|
||||||
|
#include <ops/declarable/PlatformHelper.h>
|
||||||
|
#include <ops/declarable/OpRegistrator.h>
|
||||||
|
#include <system/platform_boilerplate.h>
|
||||||
|
#include <ops/declarable/helpers/convolutions.h>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <helpers/LoopsCoordsHelper.h>
|
||||||
|
|
||||||
|
#include "armcomputeUtils.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace sd {
|
||||||
|
namespace ops {
|
||||||
|
namespace platforms {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Arm_DataType getArmType ( const DataType &dType){
|
||||||
|
Arm_DataType ret;
|
||||||
|
switch (dType){
|
||||||
|
case HALF :
|
||||||
|
ret = Arm_DataType::F16;
|
||||||
|
break;
|
||||||
|
case FLOAT32 :
|
||||||
|
ret = Arm_DataType::F32;
|
||||||
|
break;
|
||||||
|
case DOUBLE :
|
||||||
|
ret = Arm_DataType::F64;
|
||||||
|
break;
|
||||||
|
case INT8 :
|
||||||
|
ret = Arm_DataType::S8;
|
||||||
|
break;
|
||||||
|
case INT16 :
|
||||||
|
ret = Arm_DataType::S16;
|
||||||
|
break;
|
||||||
|
case INT32 :
|
||||||
|
ret = Arm_DataType::S32;
|
||||||
|
break;
|
||||||
|
case INT64 :
|
||||||
|
ret = Arm_DataType::S64;
|
||||||
|
break;
|
||||||
|
case UINT8 :
|
||||||
|
ret = Arm_DataType::U8;
|
||||||
|
break;
|
||||||
|
case UINT16 :
|
||||||
|
ret = Arm_DataType::U16;
|
||||||
|
break;
|
||||||
|
case UINT32 :
|
||||||
|
ret = Arm_DataType::U32;
|
||||||
|
break;
|
||||||
|
case UINT64 :
|
||||||
|
ret = Arm_DataType::U64;
|
||||||
|
break;
|
||||||
|
case BFLOAT16 :
|
||||||
|
ret = Arm_DataType::BFLOAT16;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ret = Arm_DataType::UNKNOWN;
|
||||||
|
};
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
bool isArmcomputeFriendly(const NDArray& arr) {
|
||||||
|
auto dType = getArmType(arr.dataType());
|
||||||
|
int rank = (int)(arr.rankOf());
|
||||||
|
return dType != Arm_DataType::UNKNOWN &&
|
||||||
|
rank<=arm_compute::MAX_DIMS &&
|
||||||
|
arr.ordering() == 'c' &&
|
||||||
|
arr.ews()==1 &&
|
||||||
|
shape::strideDescendingCAscendingF(arr.shapeInfo()) == true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm_TensorInfo getArmTensorInfo(int rank, Nd4jLong* bases,sd::DataType ndArrayType, arm_compute::DataLayout layout) {
|
||||||
|
constexpr int numChannels = 1;
|
||||||
|
auto dType = getArmType(ndArrayType);
|
||||||
|
|
||||||
|
Arm_TensorShape shape;
|
||||||
|
shape.set_num_dimensions(rank);
|
||||||
|
for (int i = 0, j = rank - 1; i < rank; i++, j--) {
|
||||||
|
shape[i] = static_cast<uint32_t>(bases[j]);
|
||||||
|
}
|
||||||
|
// fill the rest unused with 1
|
||||||
|
for (int i = rank; i < arm_compute::MAX_DIMS; i++) {
|
||||||
|
shape[i] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Arm_TensorInfo(shape, numChannels, dType, layout);
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm_TensorInfo getArmTensorInfo(const NDArray& arr,
|
||||||
|
arm_compute::DataLayout layout) {
|
||||||
|
auto dType = getArmType(arr.dataType());
|
||||||
|
|
||||||
|
//
|
||||||
|
constexpr int numChannels = 1;
|
||||||
|
int rank = (int)(arr.rankOf());
|
||||||
|
auto bases = arr.shapeOf();
|
||||||
|
auto arrStrides = arr.stridesOf();
|
||||||
|
|
||||||
|
// https://arm-software.github.io/ComputeLibrary/v20.05/_dimensions_8h_source.xhtml
|
||||||
|
// note: underhood it is stored as std::array<T, num_max_dimensions> _id;
|
||||||
|
// TensorShape is derived from Dimensions<uint32_t>
|
||||||
|
// as well as Strides : public Dimensions<uint32_t>
|
||||||
|
Arm_TensorShape shape;
|
||||||
|
Arm_Strides strides;
|
||||||
|
shape.set_num_dimensions(rank);
|
||||||
|
strides.set_num_dimensions(rank);
|
||||||
|
size_t element_size = arm_compute::data_size_from_type(dType);
|
||||||
|
for (int i = 0, j = rank - 1; i < rank; i++, j--) {
|
||||||
|
shape[i] = static_cast<uint32_t>(bases[j]);
|
||||||
|
strides[i] = static_cast<uint32_t>(arrStrides[j]) * element_size;
|
||||||
|
}
|
||||||
|
// fill the rest unused with 1
|
||||||
|
for (int i = rank; i < arm_compute::MAX_DIMS; i++) {
|
||||||
|
shape[i] = 1;
|
||||||
|
}
|
||||||
|
size_t total_size;
|
||||||
|
size_t size_ind = rank - 1;
|
||||||
|
total_size = shape[size_ind] * strides[size_ind];
|
||||||
|
|
||||||
|
Arm_TensorInfo info;
|
||||||
|
info.init(shape, numChannels, dType, strides, 0, total_size);
|
||||||
|
info.set_data_layout(layout);
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
Arm_Tensor getArmTensor(const NDArray& arr, arm_compute::DataLayout layout) {
|
||||||
|
// - Ownership of the backing memory is not transferred to the tensor itself.
|
||||||
|
// - The tensor mustn't be memory managed.
|
||||||
|
// - Padding requirements should be accounted by the client code.
|
||||||
|
// In other words, if padding is required by the tensor after the function
|
||||||
|
// configuration step, then the imported backing memory should account for it.
|
||||||
|
// Padding can be checked through the TensorInfo::padding() interface.
|
||||||
|
|
||||||
|
// Import existing pointer as backing memory
|
||||||
|
auto info = getArmTensorInfo(arr, layout);
|
||||||
|
Arm_Tensor tensor;
|
||||||
|
tensor.allocator()->init(info);
|
||||||
|
void* buff = (void*)arr.buffer();
|
||||||
|
tensor.allocator()->import_memory(buff);
|
||||||
|
return tensor;
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyFromTensor(const Arm_Tensor& inTensor, NDArray& output) {
|
||||||
|
//only for C order
|
||||||
|
//only for C order
|
||||||
|
if (output.ordering() != 'c') return;
|
||||||
|
auto shapeInfo = output.shapeInfo();
|
||||||
|
auto bases = &(shapeInfo[1]);
|
||||||
|
Nd4jLong rank = shapeInfo[0];
|
||||||
|
auto strides = output.stridesOf();
|
||||||
|
int width = bases[rank - 1];
|
||||||
|
uint8_t* outputBuffer = (uint8_t*)output.buffer();
|
||||||
|
size_t offset = 0;
|
||||||
|
arm_compute::Window window;
|
||||||
|
arm_compute::Iterator tensor_it(&inTensor, window);
|
||||||
|
|
||||||
|
int element_size = inTensor.info()->element_size();
|
||||||
|
window.use_tensor_dimensions(inTensor.info()->tensor_shape(), /* first_dimension =*/arm_compute::Window::DimY);
|
||||||
|
|
||||||
|
// if (output.ews() == 1) {
|
||||||
|
auto copySize = width * element_size;
|
||||||
|
auto dest = outputBuffer;
|
||||||
|
arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates& id)
|
||||||
|
{
|
||||||
|
auto src = tensor_it.ptr();
|
||||||
|
memcpy(dest, src, copySize);
|
||||||
|
dest += copySize;
|
||||||
|
},
|
||||||
|
tensor_it);
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// Nd4jLong coords[MAX_RANK] = {};
|
||||||
|
// if(strides[rank-1]!=1){
|
||||||
|
// throw std::runtime_error( "not implemented for subarrays whose last stride is not 1");
|
||||||
|
// //TODO: implement to work with all subarrays properly
|
||||||
|
// }
|
||||||
|
// arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates& id)
|
||||||
|
// {
|
||||||
|
// auto src = tensor_it.ptr();
|
||||||
|
// auto dest = outputBuffer + offset * element_size;
|
||||||
|
// memcpy(dest, src, width * element_size);
|
||||||
|
// offset = sd::inc_coords(bases, strides, coords, offset, rank, 1);
|
||||||
|
// },
|
||||||
|
// tensor_it);
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyToTensor(const NDArray& input, Arm_Tensor& outTensor) {
|
||||||
|
//only for C order
|
||||||
|
if (input.ordering() != 'c') return;
|
||||||
|
auto shapeInfo = input.shapeInfo();
|
||||||
|
auto bases = &(shapeInfo[1]);
|
||||||
|
Nd4jLong rank = shapeInfo[0];
|
||||||
|
auto strides = input.stridesOf();
|
||||||
|
uint8_t *inputBuffer = (uint8_t*)input.buffer();
|
||||||
|
int width = bases[rank - 1];
|
||||||
|
size_t offset = 0;
|
||||||
|
arm_compute::Window window;
|
||||||
|
arm_compute::Iterator tensor_it(&outTensor, window);
|
||||||
|
int element_size = outTensor.info()->element_size();
|
||||||
|
|
||||||
|
window.use_tensor_dimensions(outTensor.info()->tensor_shape(), /* first_dimension =*/arm_compute::Window::DimY);
|
||||||
|
|
||||||
|
// if (input.ews() == 1) {
|
||||||
|
|
||||||
|
auto copySize = width * element_size;
|
||||||
|
auto src = inputBuffer;
|
||||||
|
arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates& id)
|
||||||
|
{
|
||||||
|
auto dest = tensor_it.ptr();
|
||||||
|
memcpy(dest,src, copySize);
|
||||||
|
src += copySize;
|
||||||
|
},
|
||||||
|
tensor_it);
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// Nd4jLong coords[MAX_RANK] = {};
|
||||||
|
// if(strides[rank-1]!=1){
|
||||||
|
// throw std::runtime_error( "not implemented for subarrays whose last stride is not 1");
|
||||||
|
// //TODO: implement to work with all subarrays properly
|
||||||
|
// }
|
||||||
|
// arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates& id)
|
||||||
|
// {
|
||||||
|
// auto dest = tensor_it.ptr();
|
||||||
|
// auto src = inputBuffer + offset * element_size;
|
||||||
|
// offset = sd::inc_coords(bases, strides, coords, offset, rank, 1);
|
||||||
|
// },
|
||||||
|
// tensor_it);
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// armcompute should be built with debug option
|
||||||
|
void print_tensor(Arm_ITensor& tensor, const char* msg) {
|
||||||
|
auto info = tensor.info();
|
||||||
|
auto padding = info->padding();
|
||||||
|
std::cout << msg << "\ntotal: " << info->total_size() << "\n";
|
||||||
|
|
||||||
|
for (int i = 0; i < arm_compute::MAX_DIMS; i++) {
|
||||||
|
std::cout << info->dimension(i) << ",";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
for (int i = 0; i < arm_compute::MAX_DIMS; i++) {
|
||||||
|
std::cout << info->strides_in_bytes()[i] << ",";
|
||||||
|
}
|
||||||
|
std::cout << "\npadding: l " << padding.left << ", r " << padding.right
|
||||||
|
<< ", t " << padding.top << ", b " << padding.bottom << std::endl;
|
||||||
|
|
||||||
|
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
|
||||||
|
//note it did not print correctly fro NHWC
|
||||||
|
std::cout << msg << ":\n";
|
||||||
|
tensor.print(std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2019 Konduit K.K.
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef DEV_TESTSARMCOMPUTEUTILS_H
|
||||||
|
#define DEV_TESTSARMCOMPUTEUTILS_H
|
||||||
|
|
||||||
|
|
||||||
|
#include <legacy/NativeOps.h>
|
||||||
|
#include <array/NDArray.h>
|
||||||
|
#include <graph/Context.h>
|
||||||
|
#include <ops/declarable/PlatformHelper.h>
|
||||||
|
#include <system/platform_boilerplate.h>
|
||||||
|
#include <arm_compute/runtime/NEON/NEFunctions.h>
|
||||||
|
#include <arm_compute/core/Types.h>
|
||||||
|
#include <arm_compute/core/TensorInfo.h>
|
||||||
|
#include <arm_compute/core/TensorShape.h>
|
||||||
|
#include <arm_compute/core/Strides.h>
|
||||||
|
#include <arm_compute/core/Helpers.h>
|
||||||
|
#include <arm_compute/core/ITensor.h>
|
||||||
|
#include <arm_compute/core/Types.h>
|
||||||
|
#include <arm_compute/core/Validate.h>
|
||||||
|
#include <arm_compute/core/Window.h>
|
||||||
|
#include <arm_compute/runtime/Tensor.h>
|
||||||
|
#include <arm_compute/runtime/TensorAllocator.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace samediff;
|
||||||
|
|
||||||
|
|
||||||
|
namespace sd {
|
||||||
|
namespace ops {
|
||||||
|
namespace platforms {
|
||||||
|
|
||||||
|
using Arm_DataType = arm_compute::DataType;
|
||||||
|
using Arm_Tensor = arm_compute::Tensor;
|
||||||
|
using Arm_ITensor = arm_compute::ITensor;
|
||||||
|
using Arm_TensorInfo = arm_compute::TensorInfo;
|
||||||
|
using Arm_TensorShape = arm_compute::TensorShape;
|
||||||
|
using Arm_Strides = arm_compute::Strides;
|
||||||
|
/**
|
||||||
|
* Here we actually declare our platform helpers
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
DECLARE_PLATFORM(maxpool2d, ENGINE_CPU);
|
||||||
|
|
||||||
|
DECLARE_PLATFORM(avgpool2d, ENGINE_CPU);
|
||||||
|
|
||||||
|
//utils
|
||||||
|
Arm_DataType getArmType(const sd::DataType& dType);
|
||||||
|
|
||||||
|
Arm_TensorInfo getArmTensorInfo(int rank, Nd4jLong* bases, sd::DataType ndArrayType, arm_compute::DataLayout layout = arm_compute::DataLayout::UNKNOWN);
|
||||||
|
|
||||||
|
Arm_TensorInfo getArmTensorInfo(const NDArray& arr, arm_compute::DataLayout layout = arm_compute::DataLayout::UNKNOWN);
|
||||||
|
|
||||||
|
Arm_Tensor getArmTensor(const NDArray& arr, arm_compute::DataLayout layout = arm_compute::DataLayout::UNKNOWN);
|
||||||
|
|
||||||
|
void copyFromTensor(const Arm_Tensor& inTensor, NDArray& output);
|
||||||
|
void copyToTensor(const NDArray& input, Arm_Tensor& outTensor);
|
||||||
|
void print_tensor(Arm_ITensor& tensor, const char* msg);
|
||||||
|
bool isArmcomputeFriendly(const NDArray& arr);
|
||||||
|
|
||||||
|
|
||||||
|
template<typename F>
|
||||||
|
class ArmFunction {
|
||||||
|
public:
|
||||||
|
|
||||||
|
template<typename ...Args>
|
||||||
|
void configure(NDArray *input , NDArray *output, arm_compute::DataLayout layout, Args&& ...args) {
|
||||||
|
|
||||||
|
auto inInfo = getArmTensorInfo(*input, layout);
|
||||||
|
auto outInfo = getArmTensorInfo(*output, layout);
|
||||||
|
in.allocator()->init(inInfo);
|
||||||
|
out.allocator()->init(outInfo);
|
||||||
|
armFunction.configure(&in,&out,std::forward<Args>(args) ...);
|
||||||
|
if (in.info()->has_padding()) {
|
||||||
|
//allocate and copy
|
||||||
|
in.allocator()->allocate();
|
||||||
|
//copy
|
||||||
|
copyToTensor(*input, in);
|
||||||
|
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//import buffer
|
||||||
|
void* buff = input->buffer();
|
||||||
|
in.allocator()->import_memory(buff);
|
||||||
|
}
|
||||||
|
if (out.info()->has_padding()) {
|
||||||
|
//store pointer to our array to copy after run
|
||||||
|
out.allocator()->allocate();
|
||||||
|
outNd = output;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//import
|
||||||
|
void* buff = output->buffer();
|
||||||
|
out.allocator()->import_memory(buff);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void run() {
|
||||||
|
armFunction.run();
|
||||||
|
if (outNd) {
|
||||||
|
copyFromTensor(out, *outNd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Arm_Tensor in;
|
||||||
|
Arm_Tensor out;
|
||||||
|
NDArray *outNd=nullptr;
|
||||||
|
F armFunction{};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif //DEV_TESTSARMCOMPUTEUTILS_H
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2019 Konduit K.K.
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
// Created by Abdelrauf (rauf@konduit.ai) 2020
|
||||||
|
|
||||||
|
#include <ops/declarable/PlatformHelper.h>
|
||||||
|
#include <ops/declarable/OpRegistrator.h>
|
||||||
|
#include <system/platform_boilerplate.h>
|
||||||
|
#include <ops/declarable/helpers/convolutions.h>
|
||||||
|
|
||||||
|
|
||||||
|
#include "armcomputeUtils.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace sd {
|
||||||
|
namespace ops {
|
||||||
|
namespace platforms {
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
PLATFORM_IMPL(avgpool2d, ENGINE_CPU) {
|
||||||
|
|
||||||
|
auto input = INPUT_VARIABLE(0);
|
||||||
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
|
|
||||||
|
// 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
|
||||||
|
|
||||||
|
const auto kH = INT_ARG(0);
|
||||||
|
const auto kW = INT_ARG(1);
|
||||||
|
const auto sH = INT_ARG(2);
|
||||||
|
const auto sW = INT_ARG(3);
|
||||||
|
auto pH = INT_ARG(4);
|
||||||
|
auto pW = INT_ARG(5);
|
||||||
|
const auto dH = INT_ARG(6);
|
||||||
|
const auto dW = INT_ARG(7);
|
||||||
|
const auto paddingMode = INT_ARG(8);
|
||||||
|
const auto extraParam0 = INT_ARG(9);
|
||||||
|
const int isNCHW = block.getIArguments()->size() > 10 ? !INT_ARG(10) : 1; // INT_ARG(10): 0-NCHW, 1-NHWC
|
||||||
|
|
||||||
|
REQUIRE_TRUE(input->rankOf() == 4, 0, "AVGPOOL2D ARMCOMPUTE op: input should have rank of 4, but got %i instead", input->rankOf());
|
||||||
|
REQUIRE_TRUE(dH != 0 && dW != 0, 0, "AVGPOOL2D ARMCOMPUTE op: dilation must not be zero, but got instead {%i, %i}", dH, dW);
|
||||||
|
|
||||||
|
bool exclude_padding= (extraParam0 == 0) ? true : false;
|
||||||
|
|
||||||
|
auto dataLayout = isNCHW ? arm_compute::DataLayout::NCHW : arm_compute::DataLayout::NHWC;
|
||||||
|
|
||||||
|
// Calculate individual paddings
|
||||||
|
unsigned int pad_left, pad_top, pad_right, pad_bottom;
|
||||||
|
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||||
|
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||||
|
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, 0, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||||
|
|
||||||
|
if(paddingMode){
|
||||||
|
ConvolutionUtils::calcPadding2D(pH, pW, oH, oW, iH, iW, kH, kW, sH, sW, dH, dW);
|
||||||
|
}
|
||||||
|
pad_left = pW;
|
||||||
|
pad_top = pH;
|
||||||
|
pad_right = (oW - 1) * sW - iW + kW - pW ;
|
||||||
|
pad_bottom = (oH - 1) * sH - iH + kH - pH ;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
nd4j_printf("avgpool kH = %d, kW = %d, sH = %d, sW = %d , pH = %d , pW = %d, dH = %d, dW = %d, paddingMode = %d , isNCHW %d exclude pad %d \n" , kH , kW , sH , sW , pH
|
||||||
|
, pW , dH , dW , paddingMode,isNCHW?1:0 ,exclude_padding?1:0);
|
||||||
|
#endif
|
||||||
|
auto poolPad = arm_compute::PadStrideInfo(sW, sH, pad_left,pad_right, pad_top, pad_bottom, arm_compute::DimensionRoundingType::FLOOR);
|
||||||
|
auto poolInfo = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG, arm_compute::Size2D(kW, kH), dataLayout, poolPad, exclude_padding);
|
||||||
|
ArmFunction<arm_compute::NEPoolingLayer> pool;
|
||||||
|
pool.configure(input,output, dataLayout, poolInfo);
|
||||||
|
|
||||||
|
pool.run(); // run function
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
PLATFORM_CHECK(avgpool2d, ENGINE_CPU) {
|
||||||
|
auto input = INPUT_VARIABLE(0);
|
||||||
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
|
const int dH = INT_ARG(6);
|
||||||
|
const int dW = INT_ARG(7);
|
||||||
|
// Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
|
||||||
|
auto dTypeInput = getArmType(input->dataType());
|
||||||
|
auto dTypeOutput = getArmType(output->dataType());
|
||||||
|
bool is_supported = dH==1 && dW==1 && isArmcomputeFriendly(*input) && isArmcomputeFriendly(*output)
|
||||||
|
&& (dTypeInput ==Arm_DataType::F32)
|
||||||
|
&& (dTypeOutput ==Arm_DataType::F32);
|
||||||
|
return is_supported;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2019 Konduit K.K.
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
// Created by Abdelrauf 2020
|
||||||
|
|
||||||
|
|
||||||
|
#include <ops/declarable/PlatformHelper.h>
|
||||||
|
#include <ops/declarable/OpRegistrator.h>
|
||||||
|
#include <system/platform_boilerplate.h>
|
||||||
|
#include <ops/declarable/helpers/convolutions.h>
|
||||||
|
|
||||||
|
|
||||||
|
#include "armcomputeUtils.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace sd {
|
||||||
|
namespace ops {
|
||||||
|
namespace platforms {
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
PLATFORM_IMPL(maxpool2d, ENGINE_CPU) {
|
||||||
|
|
||||||
|
auto input = INPUT_VARIABLE(0);
|
||||||
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
|
|
||||||
|
REQUIRE_TRUE(input->rankOf() == 4, 0, "MAXPOOL2D ARMCOMPUTE OP: input array should have rank of 4, but got %i instead", input->rankOf());
|
||||||
|
|
||||||
|
// 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
|
||||||
|
const int kH = INT_ARG(0);
|
||||||
|
const int kW = INT_ARG(1);
|
||||||
|
const int sH = INT_ARG(2);
|
||||||
|
const int sW = INT_ARG(3);
|
||||||
|
int pH = INT_ARG(4);
|
||||||
|
int pW = INT_ARG(5);
|
||||||
|
const int dH = INT_ARG(6);
|
||||||
|
const int dW = INT_ARG(7);
|
||||||
|
const int paddingMode = INT_ARG(8);
|
||||||
|
// const int extraParam0 = INT_ARG(9);
|
||||||
|
const int isNCHW = block.getIArguments()->size() > 10 ? !INT_ARG(10) : 1; // INT_ARG(10): 1-NHWC, 0-NCHW
|
||||||
|
|
||||||
|
REQUIRE_TRUE(dH != 0 && dW != 0, 0, "MAXPOOL2D MKLDNN op: dilation must not be zero, but got instead {%i, %i}", dH, dW);
|
||||||
|
|
||||||
|
auto dataLayout = isNCHW ? arm_compute::DataLayout::NCHW : arm_compute::DataLayout::NHWC;
|
||||||
|
|
||||||
|
// Calculate individual paddings
|
||||||
|
unsigned int pad_left, pad_top, pad_right, pad_bottom;
|
||||||
|
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||||
|
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||||
|
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, 0, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||||
|
|
||||||
|
if(paddingMode){
|
||||||
|
ConvolutionUtils::calcPadding2D(pH, pW, oH, oW, iH, iW, kH, kW, sH, sW, dH, dW);
|
||||||
|
}
|
||||||
|
pad_left = pW;
|
||||||
|
pad_top = pH;
|
||||||
|
pad_right = (oW - 1) * sW - iW + kW - pW ;
|
||||||
|
pad_bottom = (oH - 1) * sH - iH + kH - pH ;
|
||||||
|
#if 0
|
||||||
|
nd4j_printf("avgpool kH = %d, kW = %d, sH = %d, sW = %d , pH = %d , pW = %d, dH = %d, dW = %d, paddingMode = %d , isNCHW %d exclude pad %d \n" , kH , kW , sH , sW , pH
|
||||||
|
, pW , dH , dW , paddingMode,isNCHW?1:0 ,exclude_padding?1:0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
auto poolPad = arm_compute::PadStrideInfo(sW, sH, pad_left,pad_right, pad_top, pad_bottom, arm_compute::DimensionRoundingType::FLOOR);
|
||||||
|
auto poolInfo = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX, arm_compute::Size2D(kW, kH), dataLayout, poolPad);
|
||||||
|
ArmFunction<arm_compute::NEPoolingLayer> pool;
|
||||||
|
|
||||||
|
pool.configure(input,output, dataLayout, poolInfo);
|
||||||
|
|
||||||
|
pool.run(); // run function
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
PLATFORM_CHECK(maxpool2d, ENGINE_CPU) {
|
||||||
|
auto input = INPUT_VARIABLE(0);
|
||||||
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
|
const int dH = INT_ARG(6);
|
||||||
|
const int dW = INT_ARG(7);
|
||||||
|
// Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
|
||||||
|
auto dTypeInput = getArmType(input->dataType());
|
||||||
|
auto dTypeOutput = getArmType(output->dataType());
|
||||||
|
bool is_supported = dH==1 && dW==1 && isArmcomputeFriendly(*input) && isArmcomputeFriendly(*output)
|
||||||
|
&& (dTypeInput ==Arm_DataType::F32)
|
||||||
|
&& (dTypeOutput ==Arm_DataType::F32);
|
||||||
|
return is_supported;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,185 @@
|
||||||
|
#!/bin/bash
|
||||||
|
TARGET=armv7-a
|
||||||
|
BLAS_TARGET_NAME=ARMV7
|
||||||
|
ARMCOMPUTE_TARGET=armv7a
|
||||||
|
#BASE_DIR=${HOME}/pi
|
||||||
|
#https://stackoverflow.com/questions/59895/how-to-get-the-source-directory-of-a-bash-script-from-within-the-script-itself
|
||||||
|
SOURCE="${BASH_SOURCE[0]}"
|
||||||
|
ARMCOMPUTE_DEBUG=1
|
||||||
|
LIBND4J_BUILD_MODE=Release
|
||||||
|
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
|
||||||
|
DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
SOURCE="$(readlink "$SOURCE")"
|
||||||
|
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
|
||||||
|
done
|
||||||
|
BASE_DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
CMAKE=cmake #/snap/bin/cmake
|
||||||
|
|
||||||
|
mkdir -p ${BASE_DIR}/helper_bin/
|
||||||
|
|
||||||
|
CROSS_COMPILER_URL=https://sourceforge.net/projects/raspberry-pi-cross-compilers/files/Raspberry%20Pi%20GCC%20Cross-Compiler%20Toolchains/Buster/GCC%208.3.0/Raspberry%20Pi%203A%2B%2C%203B%2B%2C%204/cross-gcc-8.3.0-pi_3%2B.tar.gz/download
|
||||||
|
CROSS_COMPILER_DIR=${BASE_DIR}/helper_bin/cross_compiler
|
||||||
|
|
||||||
|
SCONS_LOCAL_URL=http://prdownloads.sourceforge.net/scons/scons-local-3.1.1.tar.gz
|
||||||
|
SCONS_LOCAL_DIR=${BASE_DIR}/helper_bin/scons_local
|
||||||
|
|
||||||
|
THIRD_PARTY=${BASE_DIR}/third_party_libs
|
||||||
|
|
||||||
|
ARMCOMPUTE_GIT_URL=https://github.com/ARM-software/ComputeLibrary.git
|
||||||
|
ARMCOMPUTE_TAG=v20.05
|
||||||
|
ARMCOMPUTE_DIR=${THIRD_PARTY}/arm_compute_dir
|
||||||
|
|
||||||
|
OPENBLAS_GIT_URL="https://github.com/xianyi/OpenBLAS.git"
|
||||||
|
OPENBLAS_DIR=${THIRD_PARTY}/OpenBLAS
|
||||||
|
|
||||||
|
|
||||||
|
LIBND4J_SRC_DIR=${BASE_DIR}
|
||||||
|
|
||||||
|
LIBND4J_BUILD_DIR=${BASE_DIR}/build_pi
|
||||||
|
|
||||||
|
#for some downloads
|
||||||
|
XRTACT_STRIP="--strip-components=1"
|
||||||
|
|
||||||
|
HAS_ARMCOMPUTE=1
|
||||||
|
mkdir -p ${BASE_DIR}
|
||||||
|
mkdir -p ${THIRD_PARTY}
|
||||||
|
|
||||||
|
#change directory to base
|
||||||
|
cd $BASE_DIR
|
||||||
|
|
||||||
|
function message {
|
||||||
|
echo "BUILDER:::: ${@}"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function check_requirements {
|
||||||
|
for i in "${@}"
|
||||||
|
do
|
||||||
|
if [ ! -e "$i" ]; then
|
||||||
|
message "missing: ${i}"
|
||||||
|
exit -2
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function download_extract {
|
||||||
|
#$1 is url #2 is dir $3 is extract argument
|
||||||
|
if [ ! -f ${2}_file ]; then
|
||||||
|
message "download"
|
||||||
|
wget --quiet --show-progress -O ${2}_file ${1}
|
||||||
|
fi
|
||||||
|
|
||||||
|
message "extract"
|
||||||
|
#extract
|
||||||
|
mkdir -p ${2}
|
||||||
|
command="tar -xzf ${2}_file --directory=${2} ${3} "
|
||||||
|
message $command
|
||||||
|
$command
|
||||||
|
|
||||||
|
check_requirements "${2}"
|
||||||
|
}
|
||||||
|
|
||||||
|
function git_check {
|
||||||
|
#$1 is url #$2 is dir #$3 is tag or branch if optional
|
||||||
|
command="git clone --quiet ${1} ${2}"
|
||||||
|
message "$command"
|
||||||
|
$command
|
||||||
|
if [ -n "$3" ]; then
|
||||||
|
cd ${2}
|
||||||
|
command="git checkout ${3}"
|
||||||
|
message "$command"
|
||||||
|
$command
|
||||||
|
cd ${BASE_DIR}
|
||||||
|
fi
|
||||||
|
check_requirements "${2}"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if [ ! -d ${CROSS_COMPILER_DIR} ]; then
|
||||||
|
#out file
|
||||||
|
message "download CROSS_COMPILER"
|
||||||
|
download_extract ${CROSS_COMPILER_URL} ${CROSS_COMPILER_DIR} ${XRTACT_STRIP}
|
||||||
|
fi
|
||||||
|
|
||||||
|
#useful exports
|
||||||
|
export PI_FOLDER=${CROSS_COMPILER_DIR}
|
||||||
|
export RPI_BIN=${PI_FOLDER}/bin/arm-linux-gnueabihf
|
||||||
|
export PI_SYS_ROOT=${PI_FOLDER}/arm-linux-gnueabihf/libc
|
||||||
|
export LD_LIBRARY_PATH=${PI_FOLDER}/lib:$LD_LIBRARY_PATH
|
||||||
|
export CC=${RPI_BIN}-gcc
|
||||||
|
export FC=${RPI_BIN}-gfortran
|
||||||
|
export CXX=${RPI_BIN}-g++
|
||||||
|
export CPP=${RPI_BIN}-cpp
|
||||||
|
export RANLIB=${RPI_BIN}-gcc-ranlib
|
||||||
|
export LD="${RPI_BIN}-ld"
|
||||||
|
export AR="${RPI_BIN}-ar"
|
||||||
|
|
||||||
|
|
||||||
|
#lets build OpenBlas
|
||||||
|
if [ ! -d "${OPENBLAS_DIR}" ]; then
|
||||||
|
message "download OpenBLAS"
|
||||||
|
git_check "${OPENBLAS_GIT_URL}" "${OPENBLAS_DIR}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "${THIRD_PARTY}/lib/libopenblas.so" ]; then
|
||||||
|
message "build and install OpenBLAS"
|
||||||
|
cd ${OPENBLAS_DIR}
|
||||||
|
|
||||||
|
command="make TARGET=${BLAS_TARGET_NAME} HOSTCC=gcc CC=${CC} USE_THREAD=0 NOFORTRAN=1 CFLAGS=--sysroot=${PI_SYS_ROOT} LDFLAGS=\"-L${PI_SYS_ROOT}/../lib/ -lm\" &>/dev/null"
|
||||||
|
message $command
|
||||||
|
eval $command
|
||||||
|
message "install it"
|
||||||
|
command="make PREFIX=${THIRD_PARTY} install"
|
||||||
|
message $command
|
||||||
|
$command
|
||||||
|
cd $BASE_DIR
|
||||||
|
|
||||||
|
fi
|
||||||
|
check_requirements ${THIRD_PARTY}/lib/libopenblas.so
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if [ ! -d ${SCONS_LOCAL_DIR} ]; then
|
||||||
|
#out file
|
||||||
|
message "download Scons local"
|
||||||
|
download_extract ${SCONS_LOCAL_URL} ${SCONS_LOCAL_DIR}
|
||||||
|
fi
|
||||||
|
check_requirements ${SCONS_LOCAL_DIR}/scons.py
|
||||||
|
|
||||||
|
|
||||||
|
if [ ! -d "${ARMCOMPUTE_DIR}" ]; then
|
||||||
|
message "download ArmCompute Source"
|
||||||
|
git_check ${ARMCOMPUTE_GIT_URL} "${ARMCOMPUTE_DIR}" "tags/${ARMCOMPUTE_TAG}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
#build armcompute
|
||||||
|
if [ ! -f "${ARMCOMPUTE_DIR}/build/libarm_compute-static.a" ]; then
|
||||||
|
message "build arm compute"
|
||||||
|
cd ${ARMCOMPUTE_DIR}
|
||||||
|
command="CC=gcc CXX=g++ python3 ${SCONS_LOCAL_DIR}/scons.py Werror=1 -j$(nproc) toolchain_prefix=${RPI_BIN}- debug=${ARMCOMPUTE_DEBUG} neon=1 opencl=0 extra_cxx_flags=-fPIC os=linux build=cross_compile arch=${ARMCOMPUTE_TARGET} &>/dev/null"
|
||||||
|
message $command
|
||||||
|
eval $command
|
||||||
|
cd ${BASE_DIR}
|
||||||
|
fi
|
||||||
|
check_requirements "${ARMCOMPUTE_DIR}/build/libarm_compute-static.a" "${ARMCOMPUTE_DIR}/build/libarm_compute_core-static.a"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
message "build cmake for LIBND4J. output: ${LIBND4J_BUILD_DIR}"
|
||||||
|
|
||||||
|
TOOLCHAIN=${LIBND4J_SRC_DIR}/cmake/rpi.cmake
|
||||||
|
cmake_cmd="${CMAKE} -G \"Unix Makefiles\" -B${LIBND4J_BUILD_DIR} -S${LIBND4J_SRC_DIR} -DCMAKE_BUILD_TYPE=${LIBND4J_BUILD_MODE} -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DSD_ALL_OPS=true -DSD_CPU=true -DSD_LIBRARY_NAME=nd4jcpu -DSD_BUILD_TESTS=ON -DSD_ARM_BUILD=true -DOPENBLAS_PATH=${THIRD_PARTY} -DSD_ARCH=${TARGET} -DARMCOMPUTE_ROOT=${ARMCOMPUTE_DIR} -DHELPERS_armcompute=${HAS_ARMCOMPUTE}"
|
||||||
|
message $cmake_cmd
|
||||||
|
eval $cmake_cmd
|
||||||
|
|
||||||
|
#build
|
||||||
|
message "lets build"
|
||||||
|
|
||||||
|
cd ${LIBND4J_BUILD_DIR}
|
||||||
|
make -j $(nproc)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -52,14 +52,19 @@ elseif(WIN32)
|
||||||
set(CMAKE_CXX_FLAGS " -fPIC")
|
set(CMAKE_CXX_FLAGS " -fPIC")
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
|
||||||
set(CMAKE_CXX_FLAGS " -fPIC")
|
set(CMAKE_CXX_FLAGS " -fPIC")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||||
|
IF(${SD_ARCH} MATCHES "arm*")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${SD_ARCH}")
|
||||||
|
else()
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||||
|
|
||||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
|
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
|
||||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")
|
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
if (SD_CPU AND SD_SANITIZE)
|
if (SD_CPU AND SD_SANITIZE)
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address")
|
||||||
else()
|
else()
|
||||||
|
@ -130,7 +135,7 @@ if (SD_CPU)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_executable(runtests ${TEST_SOURCES})
|
add_executable(runtests ${TEST_SOURCES})
|
||||||
target_link_libraries(runtests samediff_obj ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES} gtest gtest_main)
|
target_link_libraries(runtests samediff_obj ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES} ${ARMCOMPUTE_LIBRARIES} gtest gtest_main)
|
||||||
elseif(SD_CUDA)
|
elseif(SD_CUDA)
|
||||||
|
|
||||||
add_executable(runtests ${TEST_SOURCES})
|
add_executable(runtests ${TEST_SOURCES})
|
||||||
|
|
|
@ -1113,7 +1113,10 @@ TYPED_TEST(TypedConvolutionTests2, maxpool2d_6) {
|
||||||
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
||||||
|
|
||||||
auto z = result.at(0);
|
auto z = result.at(0);
|
||||||
|
#if 0
|
||||||
|
exp.printIndexedBuffer("Expected");
|
||||||
|
z->printIndexedBuffer("Z");
|
||||||
|
#endif
|
||||||
ASSERT_TRUE(exp.isSameShape(z));
|
ASSERT_TRUE(exp.isSameShape(z));
|
||||||
ASSERT_TRUE(exp.equalsTo(z));
|
ASSERT_TRUE(exp.equalsTo(z));
|
||||||
|
|
||||||
|
@ -1132,7 +1135,10 @@ TYPED_TEST(TypedConvolutionTests2, maxpool2d_7) {
|
||||||
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
||||||
|
|
||||||
auto z = result.at(0);
|
auto z = result.at(0);
|
||||||
|
#if 0
|
||||||
|
exp.printIndexedBuffer("Expected");
|
||||||
|
z->printIndexedBuffer("Z");
|
||||||
|
#endif
|
||||||
ASSERT_TRUE(exp.isSameShape(z));
|
ASSERT_TRUE(exp.isSameShape(z));
|
||||||
ASSERT_TRUE(exp.equalsTo(z));
|
ASSERT_TRUE(exp.equalsTo(z));
|
||||||
|
|
||||||
|
@ -1151,7 +1157,10 @@ TYPED_TEST(TypedConvolutionTests2, maxpool2d_8) {
|
||||||
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
ASSERT_EQ(ND4J_STATUS_OK, result.status());
|
||||||
|
|
||||||
auto z = result.at(0);
|
auto z = result.at(0);
|
||||||
|
#if 0
|
||||||
|
exp.printIndexedBuffer("Expected");
|
||||||
|
z->printIndexedBuffer("Z");
|
||||||
|
#endif
|
||||||
ASSERT_TRUE(exp.isSameShape(z));
|
ASSERT_TRUE(exp.isSameShape(z));
|
||||||
ASSERT_TRUE(exp.equalsTo(z));
|
ASSERT_TRUE(exp.equalsTo(z));
|
||||||
}
|
}
|
||||||
|
@ -1204,7 +1213,10 @@ TYPED_TEST(TypedConvolutionTests2, maxpool2d_10) {
|
||||||
auto* output = results.at(0);
|
auto* output = results.at(0);
|
||||||
|
|
||||||
ASSERT_EQ(Status::OK(), results.status());
|
ASSERT_EQ(Status::OK(), results.status());
|
||||||
|
#if 0
|
||||||
|
expOutput.printIndexedBuffer("expOutput");
|
||||||
|
output->printIndexedBuffer("output");
|
||||||
|
#endif
|
||||||
ASSERT_TRUE(expOutput.isSameShape(output));
|
ASSERT_TRUE(expOutput.isSameShape(output));
|
||||||
ASSERT_TRUE(expOutput.equalsTo(output));
|
ASSERT_TRUE(expOutput.equalsTo(output));
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,7 +244,8 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode) {
|
||||||
#ifdef _RELEASE
|
#ifdef _RELEASE
|
||||||
TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
|
TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
|
||||||
// [2,1,135079944,1,1,8192,1,99]
|
// [2,1,135079944,1,1,8192,1,99]
|
||||||
auto initial = NDArrayFactory::create<float>('c', {1, 135079944});
|
constexpr int sizeX= 10*1000*1000;
|
||||||
|
auto initial = NDArrayFactory::create<float>('c', {1, sizeX});
|
||||||
initial = 1.0f;
|
initial = 1.0f;
|
||||||
auto exp = initial.dup();
|
auto exp = initial.dup();
|
||||||
auto neg = initial.like();
|
auto neg = initial.like();
|
||||||
|
@ -254,7 +255,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
|
||||||
auto enc_result = enc.evaluate({&initial}, {0.5f});
|
auto enc_result = enc.evaluate({&initial}, {0.5f});
|
||||||
auto encoded = enc_result.at(1);
|
auto encoded = enc_result.at(1);
|
||||||
|
|
||||||
ASSERT_EQ(135079944 + 4, encoded->lengthOf());
|
ASSERT_EQ(sizeX + 4, encoded->lengthOf());
|
||||||
ASSERT_NE(exp, initial);
|
ASSERT_NE(exp, initial);
|
||||||
/*
|
/*
|
||||||
for (int e = 0; e < initial.lengthOf(); e++) {
|
for (int e = 0; e < initial.lengthOf(); e++) {
|
||||||
|
|
|
@ -1,93 +0,0 @@
|
||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
||||||
*
|
|
||||||
* This program and the accompanying materials are made available under the
|
|
||||||
* terms of the Apache License, Version 2.0 which is available at
|
|
||||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
* License for the specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
//
|
|
||||||
// @author raver119@gmail.com
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef LIBND4J_SESSIONLOCALTESTS_H
|
|
||||||
#define LIBND4J_SESSIONLOCALTESTS_H
|
|
||||||
|
|
||||||
#include "testlayers.h"
|
|
||||||
#include <array/NDArrayFactory.h>
|
|
||||||
#include <graph/SessionLocalStorage.h>
|
|
||||||
|
|
||||||
using namespace sd::graph;
|
|
||||||
|
|
||||||
class SessionLocalTests : public testing::Test {
|
|
||||||
public:
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(SessionLocalTests, BasicTests_1) {
|
|
||||||
VariableSpace variableSpace;
|
|
||||||
SessionLocalStorage storage(&variableSpace, nullptr);
|
|
||||||
|
|
||||||
if (omp_get_max_threads() <= 1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
PRAGMA_OMP_PARALLEL_FOR_THREADS(4)
|
|
||||||
for (int e = 0; e < 4; e++) {
|
|
||||||
storage.startSession();
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT_EQ(4, storage.numberOfSessions());
|
|
||||||
|
|
||||||
PRAGMA_OMP_PARALLEL_FOR_THREADS(4)
|
|
||||||
for (int e = 0; e < 4; e++) {
|
|
||||||
storage.endSession();
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT_EQ(0, storage.numberOfSessions());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TEST_F(SessionLocalTests, BasicTests_2) {
|
|
||||||
VariableSpace variableSpace;
|
|
||||||
SessionLocalStorage storage(&variableSpace, nullptr);
|
|
||||||
|
|
||||||
if (omp_get_max_threads() <= 1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto alpha = sd::NDArrayFactory::create_<float>('c',{5,5});
|
|
||||||
alpha->assign(0.0);
|
|
||||||
|
|
||||||
variableSpace.putVariable(-1, alpha);
|
|
||||||
|
|
||||||
PRAGMA_OMP_PARALLEL_FOR_THREADS(4)
|
|
||||||
for (int e = 0; e < 4; e++) {
|
|
||||||
storage.startSession();
|
|
||||||
|
|
||||||
auto varSpace = storage.localVariableSpace();
|
|
||||||
|
|
||||||
auto arr = varSpace->getVariable(-1)->getNDArray();
|
|
||||||
arr->applyScalar(sd::scalar::Add, (float) e+1, *arr);
|
|
||||||
}
|
|
||||||
|
|
||||||
float lastValue = 0.0f;
|
|
||||||
for (int e = 1; e <= 4; e++) {
|
|
||||||
auto varSpace = storage.localVariableSpace((Nd4jLong) e);
|
|
||||||
|
|
||||||
auto arr = varSpace->getVariable(-1)->getNDArray();
|
|
||||||
|
|
||||||
//nd4j_printf("Last value: %f; Current value: %f\n", lastValue, arr->e(0));
|
|
||||||
|
|
||||||
ASSERT_NE(lastValue, arr->e<float>(0));
|
|
||||||
lastValue = arr->e<float>(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif //LIBND4J_SESSIONLOCALTESTS_H
|
|
|
@ -45,6 +45,21 @@ if ("${BUILD_MKLDNN}")
|
||||||
set(MKLDNN dnnl)
|
set(MKLDNN dnnl)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (${HELPERS_armcompute})
|
||||||
|
find_package(ARMCOMPUTE REQUIRED)
|
||||||
|
|
||||||
|
if(ARMCOMPUTE_FOUND)
|
||||||
|
message("Found ARMCOMPUTE: ${ARMCOMPUTE_LIBRARIES}")
|
||||||
|
set(HAVE_ARMCOMPUTE 1)
|
||||||
|
# Add preprocessor definition for ARM Compute NEON
|
||||||
|
add_definitions(-DARMCOMPUTENEON_ENABLED)
|
||||||
|
#build our library with neon support
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||||
|
include_directories(${ARMCOMPUTE_INCLUDE})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
endif()
|
||||||
|
|
||||||
# Download and unpack flatbuffers at configure time
|
# Download and unpack flatbuffers at configure time
|
||||||
configure_file(../../CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
|
configure_file(../../CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
|
||||||
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
|
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
|
||||||
|
@ -217,6 +232,10 @@ if ("${BUILD_MKLDNN}")
|
||||||
file(GLOB_RECURSE CUSTOMOPS_PLATFORM_SOURCES false ../../include/ops/declarable/platform/mkldnn/*.cpp)
|
file(GLOB_RECURSE CUSTOMOPS_PLATFORM_SOURCES false ../../include/ops/declarable/platform/mkldnn/*.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(HAVE_ARMCOMPUTE)
|
||||||
|
file(GLOB_RECURSE CUSTOMOPS_ARMCOMPUTE_SOURCES false ../include/ops/declarable/platform/armcompute/*.cpp ../include/ops/declarable/platform/armcompute/armcomputeUtils.h)
|
||||||
|
endif()
|
||||||
|
|
||||||
message("CPU backend")
|
message("CPU backend")
|
||||||
add_definitions(-D__CPUBLAS__=true)
|
add_definitions(-D__CPUBLAS__=true)
|
||||||
|
|
||||||
|
@ -276,8 +295,9 @@ endforeach(TMP_PATH)
|
||||||
|
|
||||||
|
|
||||||
add_executable(runtests ${LOOPS_SOURCES} ${LEGACY_SOURCES} ${EXEC_SOURCES} ${HELPERS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
add_executable(runtests ${LOOPS_SOURCES} ${LEGACY_SOURCES} ${EXEC_SOURCES} ${HELPERS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_PLATFORM_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES}
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_PLATFORM_SOURCES}
|
||||||
|
${CUSTOMOPS_ARMCOMPUTE_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES}
|
||||||
${OPS_SOURCES} ${TEST_SOURCES} ${PERF_SOURCES})
|
${OPS_SOURCES} ${TEST_SOURCES} ${PERF_SOURCES})
|
||||||
|
|
||||||
target_link_libraries(runtests gtest ${MKLDNN} gtest_main ${BLAS_LIBRARIES})
|
target_link_libraries(runtests gtest ${MKLDNN} ${ARMCOMPUTE_LIBRARIES} gtest_main ${BLAS_LIBRARIES})
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue