parent
62c6a73f9d
commit
cb6654bebb
|
@ -247,6 +247,7 @@ if(CUDA_BLAS)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
file(GLOB_RECURSE PERF_SOURCES false ../include/performance/*.cpp ../include/performance/*.h)
|
||||||
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../include/exceptions/*.cpp ../include/exceptions/*.h)
|
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../include/exceptions/*.cpp ../include/exceptions/*.h)
|
||||||
file(GLOB_RECURSE EXEC_SOURCES false ../include/execution/impl/*.cpp ../include/execution/*.cu ../include/execution/*.h)
|
file(GLOB_RECURSE EXEC_SOURCES false ../include/execution/impl/*.cpp ../include/execution/*.cu ../include/execution/*.h)
|
||||||
file(GLOB_RECURSE TYPES_SOURCES false ../include/types/*.cpp ../include/types/*.h)
|
file(GLOB_RECURSE TYPES_SOURCES false ../include/types/*.cpp ../include/types/*.h)
|
||||||
|
@ -267,7 +268,7 @@ if(CUDA_BLAS)
|
||||||
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
||||||
cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp
|
cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp
|
||||||
Environment.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
Environment.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES})
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES})
|
||||||
else()
|
else()
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_TESTS=true")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_TESTS=true")
|
||||||
|
|
||||||
|
@ -276,7 +277,7 @@ if(CUDA_BLAS)
|
||||||
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
||||||
cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp
|
cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp
|
||||||
Environment.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
Environment.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES})
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
@ -300,6 +301,7 @@ elseif(CPU_BLAS)
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__ND4J_EXPERIMENTAL__=true")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__ND4J_EXPERIMENTAL__=true")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
file(GLOB_RECURSE PERF_SOURCES false ../include/performance/*.cpp ../include/performance/*.h)
|
||||||
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../include/exceptions/*.cpp ../include/exceptions/*.h)
|
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../include/exceptions/*.cpp ../include/exceptions/*.h)
|
||||||
file(GLOB_RECURSE EXEC_SOURCES false ../include/execution/*.cpp ../include/execution/*.h)
|
file(GLOB_RECURSE EXEC_SOURCES false ../include/execution/*.cpp ../include/execution/*.h)
|
||||||
file(GLOB_RECURSE TYPES_SOURCES false ../include/types/*.cpp ../include/types/*.h)
|
file(GLOB_RECURSE TYPES_SOURCES false ../include/types/*.cpp ../include/types/*.h)
|
||||||
|
@ -320,7 +322,7 @@ elseif(CPU_BLAS)
|
||||||
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h
|
||||||
Environment.cpp Environment.h ${LOOPS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
Environment.cpp Environment.h ${LOOPS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_HELPERS_SOURCES}
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_HELPERS_SOURCES}
|
||||||
${OPS_SOURCES})
|
${OPS_SOURCES} ${PERF_SOURCES})
|
||||||
if(IOS)
|
if(IOS)
|
||||||
add_library(${LIBND4J_NAME} STATIC $<TARGET_OBJECTS:nd4jobj>)
|
add_library(${LIBND4J_NAME} STATIC $<TARGET_OBJECTS:nd4jobj>)
|
||||||
else()
|
else()
|
||||||
|
|
|
@ -759,6 +759,13 @@ public:
|
||||||
*/
|
*/
|
||||||
int getDeviceMajor(int deviceId);
|
int getDeviceMajor(int deviceId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method returns amount of cached memory
|
||||||
|
* @param deviceId
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
Nd4jLong getCachedMemory(int deviceId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param ptrToDeviceId
|
* @param ptrToDeviceId
|
||||||
|
@ -1653,6 +1660,7 @@ public:
|
||||||
|
|
||||||
int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId);
|
int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId);
|
||||||
|
|
||||||
|
void deleteCharArray(Nd4jPointer pointer);
|
||||||
void deleteIntArray(Nd4jPointer pointer);
|
void deleteIntArray(Nd4jPointer pointer);
|
||||||
void deleteLongArray(Nd4jPointer pointer);
|
void deleteLongArray(Nd4jPointer pointer);
|
||||||
void deletePointerArray(Nd4jPointer pointer);
|
void deletePointerArray(Nd4jPointer pointer);
|
||||||
|
@ -1690,6 +1698,10 @@ public:
|
||||||
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length);
|
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length);
|
||||||
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, double *data, int length);
|
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, double *data, int length);
|
||||||
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor);
|
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor);
|
||||||
|
|
||||||
|
|
||||||
|
const char* runLightBenchmarkSuit(bool printOut);
|
||||||
|
const char* runFullBenchmarkSuit(bool printOut);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -204,6 +204,9 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector<int8
|
||||||
template NDArray* NDArrayFactory::create_(const bool scalar, nd4j::LaunchContext * context);
|
template NDArray* NDArrayFactory::create_(const bool scalar, nd4j::LaunchContext * context);
|
||||||
template NDArray* NDArrayFactory::create_(const int8_t scalar, nd4j::LaunchContext * context);
|
template NDArray* NDArrayFactory::create_(const int8_t scalar, nd4j::LaunchContext * context);
|
||||||
template NDArray* NDArrayFactory::create_(const uint8_t scalar, nd4j::LaunchContext * context);
|
template NDArray* NDArrayFactory::create_(const uint8_t scalar, nd4j::LaunchContext * context);
|
||||||
|
template NDArray* NDArrayFactory::create_(const uint16_t scalar, nd4j::LaunchContext * context);
|
||||||
|
template NDArray* NDArrayFactory::create_(const uint32_t scalar, nd4j::LaunchContext * context);
|
||||||
|
template NDArray* NDArrayFactory::create_(const uint64_t scalar, nd4j::LaunchContext * context);
|
||||||
template NDArray* NDArrayFactory::create_(const int16_t scalar, nd4j::LaunchContext * context);
|
template NDArray* NDArrayFactory::create_(const int16_t scalar, nd4j::LaunchContext * context);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -72,6 +72,9 @@ bool experimentalSupport = false;
|
||||||
#include <graph/ResultWrapper.h>
|
#include <graph/ResultWrapper.h>
|
||||||
#include <helpers/DebugHelper.h>
|
#include <helpers/DebugHelper.h>
|
||||||
#include <helpers/ConstantTadHelper.h>
|
#include <helpers/ConstantTadHelper.h>
|
||||||
|
#include <performance/benchmarking/BenchmarkSuit.h>
|
||||||
|
#include <performance/benchmarking/FullBenchmarkSuit.h>
|
||||||
|
#include <performance/benchmarking/LightBenchmarkSuit.h>
|
||||||
|
|
||||||
using namespace nd4j;
|
using namespace nd4j;
|
||||||
|
|
||||||
|
@ -2304,6 +2307,11 @@ void NativeOps::deletePointerArray(Nd4jPointer pointer) {
|
||||||
delete[] ptr;
|
delete[] ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NativeOps::deleteCharArray(Nd4jPointer pointer) {
|
||||||
|
auto ptr = reinterpret_cast<char *>(pointer);
|
||||||
|
delete[] ptr;
|
||||||
|
}
|
||||||
|
|
||||||
void NativeOps::deleteIntArray(Nd4jPointer pointer) {
|
void NativeOps::deleteIntArray(Nd4jPointer pointer) {
|
||||||
auto ptr = reinterpret_cast<int *>(pointer);
|
auto ptr = reinterpret_cast<int *>(pointer);
|
||||||
delete[] ptr;
|
delete[] ptr;
|
||||||
|
@ -2792,6 +2800,38 @@ void NativeOps::sortTadByValue(Nd4jPointer *extraPointers,
|
||||||
BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES);
|
BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* NativeOps::runLightBenchmarkSuit(bool printOut) {
|
||||||
|
nd4j::LightBenchmarkSuit suit;
|
||||||
|
auto result = suit.runSuit();
|
||||||
|
|
||||||
|
if (printOut)
|
||||||
|
nd4j_printf("%s\n", result.data());
|
||||||
|
|
||||||
|
auto chars = new char[result.length()+1];
|
||||||
|
std::memcpy(chars, result.data(), result.length());
|
||||||
|
chars[result.length()] = (char) 0x0;
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
Nd4jLong NativeOps::getCachedMemory(int deviceId) {
|
||||||
|
return nd4j::ConstantHelper::getInstance()->getCachedAmount(deviceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* NativeOps::runFullBenchmarkSuit(bool printOut) {
|
||||||
|
nd4j::FullBenchmarkSuit suit;
|
||||||
|
auto result = suit.runSuit();
|
||||||
|
|
||||||
|
if (printOut)
|
||||||
|
nd4j_printf("%s\n", result.data());
|
||||||
|
|
||||||
|
auto chars = new char[result.length()+1];
|
||||||
|
std::memcpy(chars, result.data(), result.length());
|
||||||
|
chars[result.length()] = (char) 0x0;
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
BUILD_SINGLE_TEMPLATE(template void flattenGeneric,(Nd4jPointer*, int, char, void*, Nd4jLong*, void*, Nd4jLong*), LIBND4J_TYPES);
|
BUILD_SINGLE_TEMPLATE(template void flattenGeneric,(Nd4jPointer*, int, char, void*, Nd4jLong*, void*, Nd4jLong*), LIBND4J_TYPES);
|
||||||
BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES);
|
BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES);
|
||||||
|
|
|
@ -47,6 +47,8 @@
|
||||||
using namespace nd4j;
|
using namespace nd4j;
|
||||||
|
|
||||||
#include <loops/special_kernels.h>
|
#include <loops/special_kernels.h>
|
||||||
|
#include <performance/benchmarking/FullBenchmarkSuit.h>
|
||||||
|
#include <performance/benchmarking/LightBenchmarkSuit.h>
|
||||||
|
|
||||||
cudaDeviceProp *deviceProperties;
|
cudaDeviceProp *deviceProperties;
|
||||||
cudaFuncAttributes *funcAttributes = new cudaFuncAttributes[64];
|
cudaFuncAttributes *funcAttributes = new cudaFuncAttributes[64];
|
||||||
|
@ -2804,6 +2806,11 @@ void NativeOps::deletePointerArray(Nd4jPointer pointer) {
|
||||||
delete[] ptr;
|
delete[] ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NativeOps::deleteCharArray(Nd4jPointer pointer) {
|
||||||
|
auto ptr = reinterpret_cast<char *>(pointer);
|
||||||
|
delete[] ptr;
|
||||||
|
}
|
||||||
|
|
||||||
void NativeOps::deleteIntArray(Nd4jPointer pointer) {
|
void NativeOps::deleteIntArray(Nd4jPointer pointer) {
|
||||||
auto ptr = reinterpret_cast<int *>(pointer);
|
auto ptr = reinterpret_cast<int *>(pointer);
|
||||||
delete[] ptr;
|
delete[] ptr;
|
||||||
|
@ -3289,3 +3296,35 @@ Nd4jPointer NativeOps::shapeBufferForNumpy(Nd4jPointer npyArray) {
|
||||||
}
|
}
|
||||||
return reinterpret_cast<Nd4jPointer>(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true));
|
return reinterpret_cast<Nd4jPointer>(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* NativeOps::runLightBenchmarkSuit(bool printOut) {
|
||||||
|
nd4j::LightBenchmarkSuit suit;
|
||||||
|
auto result = suit.runSuit();
|
||||||
|
|
||||||
|
if (printOut)
|
||||||
|
nd4j_printf("%s\n", result.data());
|
||||||
|
|
||||||
|
auto chars = new char[result.length()+1];
|
||||||
|
std::memcpy(chars, result.data(), result.length());
|
||||||
|
chars[result.length()] = (char) 0x0;
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* NativeOps::runFullBenchmarkSuit(bool printOut) {
|
||||||
|
nd4j::FullBenchmarkSuit suit;
|
||||||
|
auto result = suit.runSuit();
|
||||||
|
|
||||||
|
if (printOut)
|
||||||
|
nd4j_printf("%s\n", result.data());
|
||||||
|
|
||||||
|
auto chars = new char[result.length()+1];
|
||||||
|
std::memcpy(chars, result.data(), result.length());
|
||||||
|
chars[result.length()] = (char) 0x0;
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
Nd4jLong NativeOps::getCachedMemory(int deviceId) {
|
||||||
|
return nd4j::ConstantHelper::getInstance()->getCachedAmount(deviceId);
|
||||||
|
}
|
|
@ -50,7 +50,7 @@ namespace nd4j {
|
||||||
unsigned int _rIterations;
|
unsigned int _rIterations;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void benchmarkOperation(OpBenchmark &benchmark);
|
std::string benchmarkOperation(OpBenchmark &benchmark);
|
||||||
|
|
||||||
void benchmarkScalarOperation(scalar::Ops op, std::string testName, double value, NDArray &x, NDArray &z);
|
void benchmarkScalarOperation(scalar::Ops op, std::string testName, double value, NDArray &x, NDArray &z);
|
||||||
|
|
||||||
|
@ -58,34 +58,30 @@ namespace nd4j {
|
||||||
|
|
||||||
void benchmarkGEMM(char orderA, std::initializer_list<Nd4jLong> shapeA, char orderB, std::initializer_list<Nd4jLong> shapeB, char orderC, std::initializer_list<Nd4jLong> shapeC);
|
void benchmarkGEMM(char orderA, std::initializer_list<Nd4jLong> shapeA, char orderB, std::initializer_list<Nd4jLong> shapeB, char orderC, std::initializer_list<Nd4jLong> shapeC);
|
||||||
|
|
||||||
void printHeader();
|
std::string printHeader();
|
||||||
public:
|
public:
|
||||||
BenchmarkHelper(unsigned int warmUpIterations = 10, unsigned int runIterations = 100);
|
BenchmarkHelper(unsigned int warmUpIterations = 10, unsigned int runIterations = 100);
|
||||||
|
|
||||||
void runOperationSuit(std::initializer_list<OpBenchmark*> benchmarks, const char *msg = nullptr);
|
std::string runOperationSuit(std::initializer_list<OpBenchmark*> benchmarks, const char *msg = nullptr);
|
||||||
void runOperationSuit(std::vector<OpBenchmark*> &benchmarks, bool postHeaders, const char *msg = nullptr);
|
std::string runOperationSuit(std::vector<OpBenchmark*> &benchmarks, bool postHeaders, const char *msg = nullptr);
|
||||||
|
std::string runOperationSuit(OpBenchmark* benchmark);
|
||||||
|
|
||||||
void runOperationSuit(ScalarBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
std::string runOperationSuit(ScalarBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
||||||
void runOperationSuit(TransformBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
std::string runOperationSuit(TransformBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
||||||
void runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
std::string runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
||||||
void runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet &, ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
std::string runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet &, ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
||||||
void runOperationSuit(PairwiseBenchmark *op, const std::function<void (ResultSet &, ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
std::string runOperationSuit(PairwiseBenchmark *op, const std::function<void (ResultSet &, ResultSet &, ResultSet &)>& func, const char *message = nullptr);
|
||||||
|
|
||||||
|
|
||||||
void runOperationSuit(TransformBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(TransformBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(ScalarBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(ScalarBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(BroadcastBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(BroadcastBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(PairwiseBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(PairwiseBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
void runOperationSuit(MatrixBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(MatrixBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
|
|
||||||
void runOperationSuit(DeclarableBenchmark *op, const std::function<Context* (Parameters &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
std::string runOperationSuit(DeclarableBenchmark *op, const std::function<Context* (Parameters &)>& func, ParametersBatch ¶metersBatch, const char *message = nullptr);
|
||||||
|
|
||||||
|
|
||||||
void runScalarSuit();
|
|
||||||
|
|
||||||
void runAllSuits();
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,8 @@ namespace nd4j {
|
||||||
std::vector<Nd4jPointer> _devicePointers;
|
std::vector<Nd4jPointer> _devicePointers;
|
||||||
std::vector<Nd4jLong> _deviceOffsets;
|
std::vector<Nd4jLong> _deviceOffsets;
|
||||||
std::mutex _mutex;
|
std::mutex _mutex;
|
||||||
|
|
||||||
|
std::vector<Nd4jLong> _counters;
|
||||||
public:
|
public:
|
||||||
~ConstantHelper() = default;
|
~ConstantHelper() = default;
|
||||||
|
|
||||||
|
@ -53,6 +55,8 @@ namespace nd4j {
|
||||||
void* replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace = nullptr);
|
void* replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace = nullptr);
|
||||||
|
|
||||||
ConstantDataBuffer* constantBuffer(const ConstantDescriptor &descriptor, nd4j::DataType dataType);
|
ConstantDataBuffer* constantBuffer(const ConstantDescriptor &descriptor, nd4j::DataType dataType);
|
||||||
|
|
||||||
|
Nd4jLong getCachedAmount(int deviceId);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace nd4j {
|
||||||
nd4j::graph::Context *_context = nullptr;
|
nd4j::graph::Context *_context = nullptr;
|
||||||
public:
|
public:
|
||||||
DeclarableBenchmark(nd4j::ops::DeclarableOp &op, std::string name = 0) : OpBenchmark() {
|
DeclarableBenchmark(nd4j::ops::DeclarableOp &op, std::string name = 0) : OpBenchmark() {
|
||||||
_op = ops::OpRegistrator::getInstance()->getOperation(op.getOpHash());
|
_op = &op; //ops::OpRegistrator::getInstance()->getOperation(op.getOpHash());
|
||||||
_testName = name;
|
_testName = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,9 +30,11 @@ namespace nd4j {
|
||||||
ConstantHelper::ConstantHelper() {
|
ConstantHelper::ConstantHelper() {
|
||||||
int numDevices = getNumberOfDevices();
|
int numDevices = getNumberOfDevices();
|
||||||
_cache.resize(numDevices);
|
_cache.resize(numDevices);
|
||||||
|
_counters.resize(numDevices);
|
||||||
for (int e = 0; e < numDevices; e++) {
|
for (int e = 0; e < numDevices; e++) {
|
||||||
std::map<ConstantDescriptor, ConstantHolder> map;
|
std::map<ConstantDescriptor, ConstantHolder> map;
|
||||||
_cache[e] = map;
|
_cache[e] = map;
|
||||||
|
_counters[e] = 0L;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,8 +46,14 @@ namespace nd4j {
|
||||||
}
|
}
|
||||||
|
|
||||||
void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) {
|
void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) {
|
||||||
|
if (workspace == nullptr) {
|
||||||
|
auto deviceId = getCurrentDevice();
|
||||||
|
_counters[deviceId] += numBytes;
|
||||||
|
}
|
||||||
|
|
||||||
int8_t *ptr = nullptr;
|
int8_t *ptr = nullptr;
|
||||||
ALLOCATE(ptr, workspace, numBytes, int8_t);
|
ALLOCATE(ptr, workspace, numBytes, int8_t);
|
||||||
|
|
||||||
std::memcpy(ptr, src, numBytes);
|
std::memcpy(ptr, src, numBytes);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
@ -71,7 +79,9 @@ namespace nd4j {
|
||||||
if (holder->hasBuffer(dataType))
|
if (holder->hasBuffer(dataType))
|
||||||
return holder->getConstantDataBuffer(dataType);
|
return holder->getConstantDataBuffer(dataType);
|
||||||
else {
|
else {
|
||||||
int8_t *cbuff = new int8_t[descriptor.length() * DataTypeUtils::sizeOf(dataType)];
|
auto size = descriptor.length() * DataTypeUtils::sizeOf(dataType);
|
||||||
|
auto cbuff = new int8_t[size];
|
||||||
|
_counters[deviceId] += size;
|
||||||
|
|
||||||
// create buffer with this dtype
|
// create buffer with this dtype
|
||||||
if (descriptor.isFloat()) {
|
if (descriptor.isFloat()) {
|
||||||
|
@ -87,6 +97,14 @@ namespace nd4j {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Nd4jLong ConstantHelper::getCachedAmount(int deviceId) {
|
||||||
|
int numDevices = getNumberOfDevices();
|
||||||
|
if (deviceId > numDevices || deviceId < 0)
|
||||||
|
return 0L;
|
||||||
|
else
|
||||||
|
return _counters[deviceId];
|
||||||
|
}
|
||||||
|
|
||||||
nd4j::ConstantHelper* nd4j::ConstantHelper::_INSTANCE = 0;
|
nd4j::ConstantHelper* nd4j::ConstantHelper::_INSTANCE = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -70,6 +70,7 @@ namespace nd4j {
|
||||||
_devicePointers.resize(numDevices);
|
_devicePointers.resize(numDevices);
|
||||||
_deviceOffsets.resize(numDevices);
|
_deviceOffsets.resize(numDevices);
|
||||||
_cache.resize(numDevices);
|
_cache.resize(numDevices);
|
||||||
|
_counters.resize(numDevices);
|
||||||
|
|
||||||
// filling all pointers
|
// filling all pointers
|
||||||
for (int e = 0; e < numDevices; e++) {
|
for (int e = 0; e < numDevices; e++) {
|
||||||
|
@ -83,6 +84,7 @@ namespace nd4j {
|
||||||
_devicePointers[e] = constant;
|
_devicePointers[e] = constant;
|
||||||
_deviceOffsets[e] = 0;
|
_deviceOffsets[e] = 0;
|
||||||
_cache[e] = devCache;
|
_cache[e] = devCache;
|
||||||
|
_counters[e] = 0L;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -115,6 +117,7 @@ namespace nd4j {
|
||||||
constantPtr = _devicePointers[deviceId];
|
constantPtr = _devicePointers[deviceId];
|
||||||
constantOffset = _deviceOffsets[deviceId];
|
constantOffset = _deviceOffsets[deviceId];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (constantOffset + numBytes >= CONSTANT_LIMIT) {
|
if (constantOffset + numBytes >= CONSTANT_LIMIT) {
|
||||||
int8_t *ptr = nullptr;
|
int8_t *ptr = nullptr;
|
||||||
ALLOCATE_SPECIAL(ptr, workspace, numBytes, int8_t);
|
ALLOCATE_SPECIAL(ptr, workspace, numBytes, int8_t);
|
||||||
|
@ -154,7 +157,9 @@ namespace nd4j {
|
||||||
if (holder->hasBuffer(dataType)) {
|
if (holder->hasBuffer(dataType)) {
|
||||||
return holder->getConstantDataBuffer(dataType);
|
return holder->getConstantDataBuffer(dataType);
|
||||||
} else {
|
} else {
|
||||||
auto cbuff = new int8_t[descriptor.length() * DataTypeUtils::sizeOf(dataType)];
|
auto numBytes = descriptor.length() * DataTypeUtils::sizeOf(dataType);
|
||||||
|
auto cbuff = new int8_t[numBytes];
|
||||||
|
_counters[deviceId] += numBytes;
|
||||||
|
|
||||||
// create buffer with this dtype
|
// create buffer with this dtype
|
||||||
if (descriptor.isFloat()) {
|
if (descriptor.isFloat()) {
|
||||||
|
@ -172,5 +177,13 @@ namespace nd4j {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Nd4jLong ConstantHelper::getCachedAmount(int deviceId) {
|
||||||
|
int numDevices = getNumberOfDevices();
|
||||||
|
if (deviceId > numDevices || deviceId < 0)
|
||||||
|
return 0L;
|
||||||
|
else
|
||||||
|
return _counters[deviceId];
|
||||||
|
}
|
||||||
|
|
||||||
nd4j::ConstantHelper* nd4j::ConstantHelper::_INSTANCE = 0;
|
nd4j::ConstantHelper* nd4j::ConstantHelper::_INSTANCE = 0;
|
||||||
}
|
}
|
|
@ -30,11 +30,11 @@ namespace nd4j {
|
||||||
_rIterations = runIterations;
|
_rIterations = runIterations;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::printHeader() {
|
std::string BenchmarkHelper::printHeader() {
|
||||||
nd4j_printf("TestName\tOpNum\tWarmup\tNumIter\tDataType\tInplace\tShape\tStrides\tAxis\tOrders\tavg (us)\tmedian (us)\tmin (us)\tmax (us)\tstdev (us)\n","");
|
return std::string("TestName\tOpNum\tWarmup\tNumIter\tDataType\tInplace\tShape\tStrides\tAxis\tOrders\tavg (us)\tmedian (us)\tmin (us)\tmax (us)\tstdev (us)\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::benchmarkOperation(OpBenchmark &benchmark) {
|
std::string BenchmarkHelper::benchmarkOperation(OpBenchmark &benchmark) {
|
||||||
|
|
||||||
for (uint i = 0; i < _wIterations; i++)
|
for (uint i = 0; i < _wIterations; i++)
|
||||||
benchmark.executeOnce();
|
benchmark.executeOnce();
|
||||||
|
@ -57,9 +57,9 @@ namespace nd4j {
|
||||||
std::sort(timings.begin(), timings.end());
|
std::sort(timings.begin(), timings.end());
|
||||||
Nd4jLong median = timings[_rIterations / 2];
|
Nd4jLong median = timings[_rIterations / 2];
|
||||||
|
|
||||||
NDArray n = NDArrayFactory::create(timings, LaunchContext::defaultContext());
|
auto n = NDArrayFactory::create(timings, LaunchContext::defaultContext());
|
||||||
|
|
||||||
double stdev = n.varianceNumber(nd4j::variance::SummaryStatsStandardDeviation, false).e<double>(0);
|
auto stdev = n.varianceNumber(nd4j::variance::SummaryStatsStandardDeviation, false).e<double>(0);
|
||||||
auto min = n.reduceNumber(nd4j::reduce::Min).e<Nd4jLong>(0);
|
auto min = n.reduceNumber(nd4j::reduce::Min).e<Nd4jLong>(0);
|
||||||
auto max = n.reduceNumber(nd4j::reduce::Max).e<Nd4jLong>(0);
|
auto max = n.reduceNumber(nd4j::reduce::Max).e<Nd4jLong>(0);
|
||||||
|
|
||||||
|
@ -71,10 +71,16 @@ namespace nd4j {
|
||||||
auto a = benchmark.axis();
|
auto a = benchmark.axis();
|
||||||
auto inpl = benchmark.inplace();
|
auto inpl = benchmark.inplace();
|
||||||
|
|
||||||
|
std::string temp;
|
||||||
|
temp.resize(65536);
|
||||||
|
|
||||||
// printing out stuff
|
// printing out stuff
|
||||||
nd4j_printf("%s\t%i\t%i\t%i\t%s\t%s\t%s\t%s\t%s\t%s\t%lld\t%lld\t%lld\t%lld\t%.2f\n", benchmark.testName().c_str(), benchmark.opNum(),
|
snprintf(const_cast<char *>(temp.data()), temp.length(), "%s\t%i\t%i\t%i\t%s\t%s\t%s\t%s\t%s\t%s\t%lld\t%lld\t%lld\t%lld\t%.2f\n", benchmark.testName().c_str(), benchmark.opNum(),
|
||||||
_wIterations, _rIterations, t.c_str(), inpl.c_str(), s.c_str(), strides.c_str(), a.c_str(), o.c_str(),
|
_wIterations, _rIterations, t.c_str(), inpl.c_str(), s.c_str(), strides.c_str(), a.c_str(), o.c_str(),
|
||||||
nd4j::math::nd4j_floor<double, Nd4jLong>(sumT), median, min, max, stdev);
|
nd4j::math::nd4j_floor<double, Nd4jLong>(sumT), median, min, max, stdev);
|
||||||
|
|
||||||
|
auto pos = temp.find('\n');
|
||||||
|
return temp.substr(0, pos + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::benchmarkScalarOperation(scalar::Ops op, std::string testName, double value, NDArray &x, NDArray &z) {
|
void BenchmarkHelper::benchmarkScalarOperation(scalar::Ops op, std::string testName, double value, NDArray &x, NDArray &z) {
|
||||||
|
@ -126,47 +132,44 @@ namespace nd4j {
|
||||||
nd4j::math::nd4j_floor<double, Nd4jLong>(sumT), median, min, max, stdev);
|
nd4j::math::nd4j_floor<double, Nd4jLong>(sumT), median, min, max, stdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(std::initializer_list<OpBenchmark*> benchmarks, const char *msg) {
|
std::string BenchmarkHelper::runOperationSuit(std::initializer_list<OpBenchmark*> benchmarks, const char *msg) {
|
||||||
std::vector<OpBenchmark*> ops(benchmarks);
|
std::vector<OpBenchmark*> ops(benchmarks);
|
||||||
runOperationSuit(ops, msg);
|
return runOperationSuit(ops, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(std::vector<OpBenchmark*> &benchmarks, bool postHeaders, const char *msg) {
|
std::string BenchmarkHelper::runOperationSuit(OpBenchmark* benchmark) {
|
||||||
|
return benchmarkOperation(*benchmark);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string BenchmarkHelper::runOperationSuit(std::vector<OpBenchmark*> &benchmarks, bool postHeaders, const char *msg) {
|
||||||
|
std::string result;
|
||||||
|
|
||||||
if (msg != nullptr && postHeaders) {
|
if (msg != nullptr && postHeaders) {
|
||||||
nd4j_printf("\n%s\n", msg);
|
result += "\n";
|
||||||
|
result += msg;
|
||||||
|
result += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (postHeaders)
|
if (postHeaders)
|
||||||
printHeader();
|
result += printHeader();
|
||||||
|
|
||||||
for (auto v:benchmarks)
|
for (auto v:benchmarks)
|
||||||
benchmarkOperation(*v);
|
result += benchmarkOperation(*v);
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runScalarSuit() {
|
std::string BenchmarkHelper::runOperationSuit(DeclarableBenchmark *op, const std::function<Context* (Parameters &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
printHeader();
|
|
||||||
|
|
||||||
std::initializer_list<std::initializer_list<Nd4jLong>> shapes = {{100}, {32, 256}, {32, 150, 200}, {32, 3, 244, 244}, {32, 64, 128, 256}};
|
|
||||||
std::initializer_list<nd4j::DataType> dataTypes = {nd4j::DataType::FLOAT32, nd4j::DataType::DOUBLE};
|
|
||||||
std::initializer_list<nd4j::scalar::Ops> ops = {scalar::Add, scalar::Divide, scalar::Pow};
|
|
||||||
|
|
||||||
for (const auto &d:dataTypes) {
|
|
||||||
for (const auto &o:ops) {
|
|
||||||
for (const auto &s:shapes) {
|
|
||||||
//benchmarkScalarOperation(o, 2.0, s, d);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(DeclarableBenchmark *op, const std::function<Context* (Parameters &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string result;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
result += "\n";
|
||||||
|
result += message;
|
||||||
|
result += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
result += printHeader();
|
||||||
|
|
||||||
std::vector<OpBenchmark*> list;
|
std::vector<OpBenchmark*> list;
|
||||||
|
|
||||||
|
@ -175,25 +178,26 @@ namespace nd4j {
|
||||||
|
|
||||||
auto clone = reinterpret_cast<DeclarableBenchmark*>(op->clone());
|
auto clone = reinterpret_cast<DeclarableBenchmark*>(op->clone());
|
||||||
clone->setContext(ctx);
|
clone->setContext(ctx);
|
||||||
list.emplace_back(clone);
|
|
||||||
|
result += runOperationSuit(clone);
|
||||||
|
|
||||||
|
delete clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(list, false);
|
return result;
|
||||||
|
|
||||||
// removing everything
|
|
||||||
for (auto v:list) {
|
|
||||||
delete reinterpret_cast<DeclarableBenchmark*>(v);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ScalarBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ScalarBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -217,16 +221,20 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ScalarBenchmark*>(v);
|
delete reinterpret_cast<ScalarBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ScalarBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ScalarBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
||||||
|
std::string output;
|
||||||
|
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
x.setNonRemovable();
|
x.setNonRemovable();
|
||||||
ResultSet z;
|
ResultSet z;
|
||||||
|
@ -248,23 +256,27 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, message);
|
output += runOperationSuit(result, message);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ScalarBenchmark*>(v);
|
delete reinterpret_cast<ScalarBenchmark*>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(TransformBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(TransformBenchmark *op, const std::function<void (Parameters &, ResultSet &, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
|
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -288,16 +300,20 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<TransformBenchmark*>(v);
|
delete reinterpret_cast<TransformBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(TransformBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(TransformBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
||||||
|
std::string output;
|
||||||
|
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
x.setNonRemovable();
|
x.setNonRemovable();
|
||||||
ResultSet z;
|
ResultSet z;
|
||||||
|
@ -319,22 +335,27 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, message);
|
output += runOperationSuit(result, message);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<TransformBenchmark*>(v);
|
delete reinterpret_cast<TransformBenchmark*>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
|
std::string output;
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -358,16 +379,19 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ReductionBenchmark*>(v);
|
delete reinterpret_cast<ReductionBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet&, ResultSet&)>& func, const char *message) {
|
||||||
|
std::string output;
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
x.setNonRemovable();
|
x.setNonRemovable();
|
||||||
ResultSet z;
|
ResultSet z;
|
||||||
|
@ -389,19 +413,24 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, message);
|
output += runOperationSuit(result, message);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ReductionBenchmark*>(v);
|
delete reinterpret_cast<ReductionBenchmark*>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
printHeader();
|
||||||
|
@ -436,16 +465,20 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ReductionBenchmark*>(v);
|
delete reinterpret_cast<ReductionBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet&, ResultSet&, ResultSet &)>& func, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(ReductionBenchmark *op, const std::function<void (ResultSet&, ResultSet&, ResultSet &)>& func, const char *message) {
|
||||||
|
std::string output;
|
||||||
|
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
x.setNonRemovable();
|
x.setNonRemovable();
|
||||||
ResultSet y;
|
ResultSet y;
|
||||||
|
@ -474,22 +507,27 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, message);
|
output += runOperationSuit(result, message);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<ReductionBenchmark*>(v);
|
delete reinterpret_cast<ReductionBenchmark*>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(BroadcastBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(BroadcastBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -518,23 +556,28 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<BroadcastBenchmark*>(v);
|
delete reinterpret_cast<BroadcastBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(PairwiseBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(PairwiseBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -562,16 +605,20 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<PairwiseBenchmark*>(v);
|
delete reinterpret_cast<PairwiseBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(PairwiseBenchmark *op, const std::function<void (ResultSet&, ResultSet&, ResultSet &)>& func, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(PairwiseBenchmark *op, const std::function<void (ResultSet&, ResultSet&, ResultSet &)>& func, const char *message) {
|
||||||
|
std::string output;
|
||||||
|
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
x.setNonRemovable();
|
x.setNonRemovable();
|
||||||
ResultSet y;
|
ResultSet y;
|
||||||
|
@ -597,22 +644,27 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, message);
|
output += runOperationSuit(result, message);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<PairwiseBenchmark*>(v);
|
delete reinterpret_cast<PairwiseBenchmark*>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkHelper::runOperationSuit(MatrixBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
std::string BenchmarkHelper::runOperationSuit(MatrixBenchmark *op, const std::function<void (Parameters &, ResultSet&, ResultSet&, ResultSet &)>& func, ParametersBatch ¶metersBatch, const char *message) {
|
||||||
auto parameters = parametersBatch.parameters();
|
auto parameters = parametersBatch.parameters();
|
||||||
|
std::string output;
|
||||||
|
|
||||||
if (message != nullptr) {
|
if (message != nullptr) {
|
||||||
nd4j_printf("\n%s\n", message);
|
output += "\n";
|
||||||
|
output += message;
|
||||||
|
output += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
printHeader();
|
output += printHeader();
|
||||||
|
|
||||||
for (auto &p: parameters) {
|
for (auto &p: parameters) {
|
||||||
ResultSet x;
|
ResultSet x;
|
||||||
|
@ -637,12 +689,14 @@ namespace nd4j {
|
||||||
result.emplace_back(clone);
|
result.emplace_back(clone);
|
||||||
}
|
}
|
||||||
|
|
||||||
runOperationSuit(result, false);
|
output += runOperationSuit(result, false);
|
||||||
|
|
||||||
// removing everything
|
// removing everything
|
||||||
for (auto v:result) {
|
for (auto v:result) {
|
||||||
delete reinterpret_cast<MatrixBenchmark*>(v);
|
delete reinterpret_cast<MatrixBenchmark*>(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -56,7 +56,7 @@ void gather(nd4j::LaunchContext * context, const NDArray* input, const NDArray*
|
||||||
std::vector<int> dimsOut(indices->rankOf());
|
std::vector<int> dimsOut(indices->rankOf());
|
||||||
std::iota(dimsOut.begin(), dimsOut.end(), axis); // fill with axis, axis+1, ... axis+indices->rankOf()-1
|
std::iota(dimsOut.begin(), dimsOut.end(), axis); // fill with axis, axis+1, ... axis+indices->rankOf()-1
|
||||||
const Nd4jLong numOfSubArrs = indices->lengthOf();
|
const Nd4jLong numOfSubArrs = indices->lengthOf();
|
||||||
PRAGMA_OMP_PARALLEL_FOR_ARGS(if(numOfSubArrs > Environment::getInstance()->elementwiseThreshold()) schedule(guided))
|
PRAGMA_OMP_PARALLEL_FOR_IF(numOfSubArrs > Environment::getInstance()->tadThreshold())
|
||||||
for(int i = 0; i < numOfSubArrs; ++i) {
|
for(int i = 0; i < numOfSubArrs; ++i) {
|
||||||
NDArray subArrOut = (*output)(i, dimsOut);
|
NDArray subArrOut = (*output)(i, dimsOut);
|
||||||
NDArray subArrIn = (*input)(indices->e<Nd4jLong>(i), {axis});
|
NDArray subArrIn = (*input)(indices->e<Nd4jLong>(i), {axis});
|
||||||
|
@ -72,7 +72,7 @@ PRAGMA_OMP_PARALLEL_FOR_ARGS(if(numOfSubArrs > Environment::getInstance()->eleme
|
||||||
}
|
}
|
||||||
else { // vector case
|
else { // vector case
|
||||||
const Nd4jLong numOfSubArrs = intArgs.size() - 1;
|
const Nd4jLong numOfSubArrs = intArgs.size() - 1;
|
||||||
PRAGMA_OMP_PARALLEL_FOR_ARGS(if(numOfSubArrs > Environment::getInstance()->elementwiseThreshold()) schedule(guided))
|
PRAGMA_OMP_PARALLEL_FOR_IF(numOfSubArrs > Environment::getInstance()->tadThreshold())
|
||||||
for(int i = 0; i < numOfSubArrs; ++i) {
|
for(int i = 0; i < numOfSubArrs; ++i) {
|
||||||
NDArray subArrOut = (*output)(i, {axis});
|
NDArray subArrOut = (*output)(i, {axis});
|
||||||
NDArray subArrIn = (*input)(intArgs[i+1], {axis});
|
NDArray subArrIn = (*input)(intArgs[i+1], {axis});
|
||||||
|
|
|
@ -171,15 +171,8 @@ void lstmBlockCell(const NDArray* xt, const NDArray* cLast, const NDArray* yLast
|
||||||
const int numUnits = cLast->sizeAt(1);
|
const int numUnits = cLast->sizeAt(1);
|
||||||
|
|
||||||
//Concat inputs: [xt, yt-1]: concat([bs,nIn],[bs,nOut]) -> [bs, (nIn+nOut)]
|
//Concat inputs: [xt, yt-1]: concat([bs,nIn],[bs,nOut]) -> [bs, (nIn+nOut)]
|
||||||
nd4j::ops::concat concat;
|
|
||||||
Context cContext(119);
|
|
||||||
auto concatOut = NDArrayFactory::create(xt->ordering(), {xt->sizeAt(0), xt->sizeAt(1) + yLast->sizeAt(1)}, xt->dataType(), xt->getContext());
|
auto concatOut = NDArrayFactory::create(xt->ordering(), {xt->sizeAt(0), xt->sizeAt(1) + yLast->sizeAt(1)}, xt->dataType(), xt->getContext());
|
||||||
cContext.setInputArray(0, const_cast<NDArray*>(xt), false);
|
helpers::concat(xt->getContext(), {const_cast<NDArray*>(xt), const_cast<NDArray*>(yLast)}, concatOut, {1});
|
||||||
cContext.setInputArray(1, const_cast<NDArray*>(yLast), false);
|
|
||||||
cContext.setOutputArray(0, &concatOut, false);
|
|
||||||
cContext.getIArguments()->emplace_back(1);
|
|
||||||
|
|
||||||
concat.execute(&cContext);
|
|
||||||
|
|
||||||
//NDArray* NDArrayFactory::create_( const char order, const std::vector<Nd4jLong> &shape, nd4j::DataType dataType, nd4j::memory::Workspace* workspace) {
|
//NDArray* NDArrayFactory::create_( const char order, const std::vector<Nd4jLong> &shape, nd4j::DataType dataType, nd4j::memory::Workspace* workspace) {
|
||||||
std::vector<Nd4jLong> shape = {bS, 4*numUnits};
|
std::vector<Nd4jLong> shape = {bS, 4*numUnits};
|
||||||
|
|
|
@ -45,10 +45,26 @@ namespace nd4j {
|
||||||
const NDArray* iSeq, const NDArray* cSeq, const NDArray* fSeq, const NDArray* oSeq, const NDArray* zSeq,
|
const NDArray* iSeq, const NDArray* cSeq, const NDArray* fSeq, const NDArray* oSeq, const NDArray* zSeq,
|
||||||
const NDArray* hSeq, const NDArray* ySeq, const std::vector<double>& params, const int dataFormat){
|
const NDArray* hSeq, const NDArray* ySeq, const std::vector<double>& params, const int dataFormat){
|
||||||
|
|
||||||
const int seqLen = xSeq->sizeAt(0);
|
int seqLen, mb, inSize, outSize;
|
||||||
const int mb = xSeq->sizeAt(1);
|
|
||||||
const int inSize = xSeq->sizeAt(2);
|
if(dataFormat == 0) {
|
||||||
const int outSize = iSeq->sizeAt(2);
|
seqLen = xSeq->sizeAt(0);
|
||||||
|
mb = xSeq->sizeAt(1);
|
||||||
|
inSize = xSeq->sizeAt(2);
|
||||||
|
outSize = iSeq->sizeAt(2);
|
||||||
|
}
|
||||||
|
else if(dataFormat == 1) {
|
||||||
|
seqLen = xSeq->sizeAt(2);
|
||||||
|
mb = xSeq->sizeAt(0);
|
||||||
|
inSize = xSeq->sizeAt(1);
|
||||||
|
outSize = iSeq->sizeAt(1);
|
||||||
|
}
|
||||||
|
else if(dataFormat == 2) {
|
||||||
|
seqLen = xSeq->sizeAt(1);
|
||||||
|
mb = xSeq->sizeAt(0);
|
||||||
|
inSize = xSeq->sizeAt(2);
|
||||||
|
outSize = iSeq->sizeAt(2);
|
||||||
|
}
|
||||||
|
|
||||||
const std::vector<Nd4jLong> inSliceShape({mb,inSize});
|
const std::vector<Nd4jLong> inSliceShape({mb,inSize});
|
||||||
const std::vector<Nd4jLong> outSliceShape({mb,outSize});
|
const std::vector<Nd4jLong> outSliceShape({mb,outSize});
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
//
|
||||||
|
// @author raver119@gmail.com
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef LIBND4J_BENCHMARKSUIT_H
|
||||||
|
#define LIBND4J_BENCHMARKSUIT_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <pointercast.h>
|
||||||
|
#include <dll.h>
|
||||||
|
#include <BenchmarkHelper.h>
|
||||||
|
#include <NDArrayFactory.h>
|
||||||
|
|
||||||
|
namespace nd4j {
|
||||||
|
class ND4J_EXPORT BenchmarkSuit {
|
||||||
|
public:
|
||||||
|
BenchmarkSuit() = default;
|
||||||
|
~BenchmarkSuit() = default;
|
||||||
|
|
||||||
|
virtual std::string runSuit() = 0;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //DEV_TESTS_BENCHMARKSUIT_H
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
//
|
||||||
|
// @author raver119@gmail.com
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef LIBND4J_FULLBENCHMARKSUIT_H
|
||||||
|
#define LIBND4J_FULLBENCHMARKSUIT_H
|
||||||
|
|
||||||
|
#include <performance/benchmarking/BenchmarkSuit.h>
|
||||||
|
|
||||||
|
namespace nd4j {
|
||||||
|
class FullBenchmarkSuit : public BenchmarkSuit {
|
||||||
|
public:
|
||||||
|
std::string runSuit() override;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //DEV_TESTS_FULLBENCHMARKSUIT_H
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
//
|
||||||
|
// @author raver119@gmail.com
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef LIBND4J_LIGHTBENCHMARKSUIT_H
|
||||||
|
#define LIBND4J_LIGHTBENCHMARKSUIT_H
|
||||||
|
|
||||||
|
#include <performance/benchmarking/BenchmarkSuit.h>
|
||||||
|
|
||||||
|
namespace nd4j {
|
||||||
|
class LightBenchmarkSuit : public BenchmarkSuit {
|
||||||
|
public:
|
||||||
|
std::string runSuit() override;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //DEV_TESTS_LIGHTBENCHMARKSUIT_H
|
|
@ -0,0 +1,20 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
//
|
||||||
|
// @author raver119@gmail.com
|
||||||
|
//
|
||||||
|
#include <performance/benchmarking/BenchmarkSuit.h>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,639 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
//
|
||||||
|
// @author raver119@gmail.com
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <ops/declarable/CustomOperations.h>
|
||||||
|
#include "performance/benchmarking/LightBenchmarkSuit.h"
|
||||||
|
|
||||||
|
#ifdef _RELEASE
|
||||||
|
#define WARMUP 3
|
||||||
|
#define NUM_ITER 10
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define WARMUP 0
|
||||||
|
#define NUM_ITER 1
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace nd4j {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string transformBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "transformBenchmark " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
IntPowerParameters length("length", 2, 8, 20, 4); //2^8, 2^12, 2^16, 2^20 - 4MB
|
||||||
|
BoolParameters inplace("inplace");
|
||||||
|
|
||||||
|
ParametersBatch batch({&length, &inplace});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XZ() {
|
||||||
|
auto arr = NDArrayFactory::create_<T>('c', {p.getIntParam("length")});
|
||||||
|
arr->assign(1.0);
|
||||||
|
x.push_back(arr);
|
||||||
|
if(p.getIntParam("inplace") == 1){
|
||||||
|
z.push_back(arr);
|
||||||
|
} else {
|
||||||
|
z.push_back(NDArrayFactory::create_<T>('c', {p.getIntParam("length")}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ScalarBenchmark sbRelu(scalar::Ops::RELU, "RELU");
|
||||||
|
sbRelu.setY(NDArrayFactory::create_<T>(0.0));
|
||||||
|
|
||||||
|
TransformBenchmark tbSigmoid(transform::StrictOps::Sigmoid, "sigmoid");
|
||||||
|
TransformBenchmark tbSoftmax(transform::StrictOps::SoftMax, "softmax");
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&sbRelu, generator, batch, "RELU");
|
||||||
|
output += helper.runOperationSuit(&tbSigmoid, generator, batch, "Sigmoid");
|
||||||
|
output += helper.runOperationSuit(&tbSigmoid, generator, batch, "Softmax");
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string scalarBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "scalarBenchmark " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
IntPowerParameters length("length", 2, 8, 20, 4); //2^8, 2^12, 2^16, 2^20
|
||||||
|
BoolParameters inplace("inplace");
|
||||||
|
|
||||||
|
ParametersBatch batch({&length, &inplace});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XZ() {
|
||||||
|
auto arr = NDArrayFactory::create_<T>('c', {p.getIntParam("length")});
|
||||||
|
arr->assign(1.0);
|
||||||
|
x.push_back(arr);
|
||||||
|
if(p.getIntParam("inplace") == 1){
|
||||||
|
z.push_back(arr);
|
||||||
|
} else {
|
||||||
|
z.push_back(NDArrayFactory::create_<T>('c', {p.getIntParam("length")}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ScalarBenchmark sbAdd(scalar::Ops::Add, "sAdd");
|
||||||
|
ScalarBenchmark sbDiv(scalar::Ops::Divide, "sDiv");
|
||||||
|
ScalarBenchmark sbPow(scalar::Ops::Pow, "sPow");
|
||||||
|
|
||||||
|
|
||||||
|
sbAdd.setY(NDArrayFactory::create_<T>(3.14159265359));
|
||||||
|
sbDiv.setY(NDArrayFactory::create_<T>(3.14159265359));
|
||||||
|
sbPow.setY(NDArrayFactory::create_<T>(3.14159265359));
|
||||||
|
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&sbAdd, generator, batch, "Scalar Addition - x.add(3.14159265359)");
|
||||||
|
output += helper.runOperationSuit(&sbDiv, generator, batch, "Scalar Division - x.div(3.14159265359)");
|
||||||
|
output += helper.runOperationSuit(&sbPow, generator, batch, "Scalar Power - x.pow(3.14159265359)");
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string pairwiseBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "pairwiseBenchmark " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
IntPowerParameters length("length", 2, 8, 20, 4); //2^4 to 2^20 in steps of 4 - 2^4, 2^8, 2^16, 2^20
|
||||||
|
BoolParameters inplace("inplace");
|
||||||
|
|
||||||
|
ParametersBatch batch({&length, &inplace});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XYZ() {
|
||||||
|
auto arr1 = NDArrayFactory::create_<T>('c', {p.getIntParam("length")});
|
||||||
|
auto arr2 = NDArrayFactory::create_<T>('c', {p.getIntParam("length")});
|
||||||
|
x.push_back(arr1);
|
||||||
|
y.push_back(arr2);
|
||||||
|
if(p.getIntParam("inplace") == 1){
|
||||||
|
z.push_back(arr1);
|
||||||
|
} else {
|
||||||
|
z.push_back(NDArrayFactory::create_<T>('c', {p.getIntParam("length")}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
PairwiseBenchmark pb1(pairwise::Ops::Add, "Add");
|
||||||
|
output += helper.runOperationSuit(&pb1, generator, batch, "Pairwise Add");
|
||||||
|
|
||||||
|
PairwiseBenchmark pb2(pairwise::Ops::Divide, "Divide");
|
||||||
|
output += helper.runOperationSuit(&pb2, generator, batch, "Pairwise Divide");
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string mismatchedOrderAssign() {
|
||||||
|
std::string output;
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
IntPowerParameters rows("rows", 2, 8, 20, 4); //2^8, 2^12, 2^16, 2^20
|
||||||
|
BoolParameters cf("cf");
|
||||||
|
|
||||||
|
ParametersBatch batch({&rows, &cf});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XZ() {
|
||||||
|
int numElements = 4194304; //2^24
|
||||||
|
int rows = p.getIntParam("rows");
|
||||||
|
int cols = numElements / rows;
|
||||||
|
bool c = p.getIntParam("cf");
|
||||||
|
|
||||||
|
auto arr = NDArrayFactory::create_<float>(c ? 'c' : 'f', {rows, cols});
|
||||||
|
auto arr2 = NDArrayFactory::create_<float>(c ? 'f' : 'c', {rows, cols});
|
||||||
|
x.push_back(arr);
|
||||||
|
z.push_back(arr2);
|
||||||
|
};
|
||||||
|
|
||||||
|
TransformBenchmark tb(transform::AnyOps::Assign, "assign");
|
||||||
|
output += helper.runOperationSuit(&tb, generator, batch, "C->F and F->C Assign F32");
|
||||||
|
|
||||||
|
//Also test: NCHW to NHWC and back
|
||||||
|
BoolParameters nchw("nchw");
|
||||||
|
int mb = 8;
|
||||||
|
int hw = 64;
|
||||||
|
int c = 3;
|
||||||
|
ParametersBatch batch2({&nchw});
|
||||||
|
auto generator2 = PARAMETRIC_XZ() {
|
||||||
|
bool nchw = p.getIntParam("nchw");
|
||||||
|
|
||||||
|
if(nchw) {
|
||||||
|
auto orig = NDArrayFactory::create_<float>('c', {mb, c, hw, hw});
|
||||||
|
orig->permutei({0,2,3,1});
|
||||||
|
x.push_back(orig);
|
||||||
|
z.push_back(NDArrayFactory::create_<float>('c', {mb, hw, hw, c}));
|
||||||
|
} else {
|
||||||
|
auto orig = NDArrayFactory::create_<float>('c', {mb, hw, hw, c});
|
||||||
|
orig->permutei({0,3,1,2});
|
||||||
|
x.push_back(orig);
|
||||||
|
z.push_back(NDArrayFactory::create_<float>('c', {mb, c, hw, hw}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TransformBenchmark tb2(transform::AnyOps::Assign, "assign_nchw");
|
||||||
|
output += helper.runOperationSuit(&tb2, generator2, batch2, "nchw->nhwc and nhwc->nchw Assign FP32");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string gemmBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "gemm " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
for (int o = 0; o <= 1; o++) {
|
||||||
|
char resultOrder = (o == 0 ? 'f' : 'c');
|
||||||
|
IntPowerParameters sz("sz", 2, 4, 10, 2); //2^4=16, ..., 2^10=1024 -> 4 elements
|
||||||
|
|
||||||
|
ParametersBatch b({&sz});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XYZ() {
|
||||||
|
auto a = p.getIntParam("sz");
|
||||||
|
auto b = p.getIntParam("sz");
|
||||||
|
auto c = p.getIntParam("sz");
|
||||||
|
std::vector<Nd4jLong> shapeA;
|
||||||
|
std::vector<Nd4jLong> shapeB;
|
||||||
|
shapeA = {a, b};
|
||||||
|
shapeB = {b, c};
|
||||||
|
auto A = NDArrayFactory::create_<T>('c', shapeA);
|
||||||
|
auto B = NDArrayFactory::create_<T>('c', shapeB);
|
||||||
|
auto C = NDArrayFactory::create_<T>(resultOrder, {a, c});
|
||||||
|
|
||||||
|
x.push_back(A);
|
||||||
|
y.push_back(B);
|
||||||
|
z.push_back(C);
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string n;
|
||||||
|
n += "Gemm - cOrder=";
|
||||||
|
n += resultOrder;
|
||||||
|
|
||||||
|
MatrixBenchmark mb(1.0, 0.0, false, false, n);
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&mb, generator, b, n.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string reduceFullBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "reduceFullBenchmark " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
IntPowerParameters length("length", 2, 8, 20, 4); //2^8, 2^12, 2^16, 2^20
|
||||||
|
|
||||||
|
ParametersBatch batch({&length});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XYZ() {
|
||||||
|
auto arr = NDArrayFactory::create_<T>('c', {p.getIntParam("length")});
|
||||||
|
|
||||||
|
x.push_back(arr);
|
||||||
|
y.push_back(nullptr);
|
||||||
|
z.push_back(NDArrayFactory::create_<T>(0.0f));
|
||||||
|
};
|
||||||
|
|
||||||
|
ReductionBenchmark rbSum(reduce::SameOps::Sum, "sum");
|
||||||
|
ReductionBenchmark rbProd(reduce::SameOps::Prod, "prod");
|
||||||
|
ReductionBenchmark rbMax(reduce::SameOps::Max, "max");
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&rbSum, (const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>)(generator), batch, "Sum - Full Array Reduction");
|
||||||
|
output += helper.runOperationSuit(&rbProd, (const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>)(generator), batch, "Product - Full Array Reduction");
|
||||||
|
output += helper.runOperationSuit(&rbMax, (const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>)(generator), batch, "Maximum - Full Array Reduction");
|
||||||
|
|
||||||
|
//Index reduction
|
||||||
|
nd4j::ops::argmax opArgmax;
|
||||||
|
DeclarableBenchmark dbArgmax(opArgmax, "Argmax");
|
||||||
|
auto generator3 = PARAMETRIC_D(){
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
|
||||||
|
ctx->setInputArray(0, NDArrayFactory::create_<T>('c', {p.getIntParam("length")}), true);
|
||||||
|
ctx->setInputArray(1, NDArrayFactory::create_<Nd4jLong>((Nd4jLong)0), true);
|
||||||
|
ctx->setOutputArray(0, NDArrayFactory::create_<Nd4jLong>(0), true);
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
output += helper.runOperationSuit(&dbArgmax, generator3, batch, "Argmax Full Array Reduction");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string reduceDimBenchmark(){
|
||||||
|
std::string output;
|
||||||
|
output += "reduceDimBenchmark " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
int length[] = {1024*1024};
|
||||||
|
int pow[] = {10};
|
||||||
|
|
||||||
|
for( int i=0; i<1; i++ ){
|
||||||
|
IntPowerParameters rows("rows", 2, 0, pow[i], 2);
|
||||||
|
BoolParameters dim("dim");
|
||||||
|
|
||||||
|
|
||||||
|
ParametersBatch batch({&rows, &dim});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_XYZ() {
|
||||||
|
int rows = p.getIntParam("rows");
|
||||||
|
int cols = length[i] / rows;
|
||||||
|
int dim = p.getIntParam("dim");
|
||||||
|
auto arr = NDArrayFactory::create_<T>('c', {rows, cols});
|
||||||
|
|
||||||
|
|
||||||
|
x.push_back(arr);
|
||||||
|
y.push_back(NDArrayFactory::create_<Nd4jLong>(dim));
|
||||||
|
|
||||||
|
NDArray* result;
|
||||||
|
if(dim == 0){
|
||||||
|
result = NDArrayFactory::create_<T>('c', {cols});
|
||||||
|
} else {
|
||||||
|
result = NDArrayFactory::create_<T>('c', {rows});
|
||||||
|
}
|
||||||
|
z.push_back(result);
|
||||||
|
};
|
||||||
|
|
||||||
|
ReductionBenchmark rbSum(reduce::SameOps::Sum, "sum");
|
||||||
|
ReductionBenchmark rbMax(reduce::SameOps::Max, "max");
|
||||||
|
|
||||||
|
std::string s1("Sum Along Dimension - ");
|
||||||
|
s1 += std::to_string(length[i]);
|
||||||
|
std::string s3("Maximum Along Dimension - ");
|
||||||
|
s3 += std::to_string(length[i]);
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&rbSum, (const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>)(generator), batch, s1.c_str());
|
||||||
|
output += helper.runOperationSuit(&rbMax, (const std::function<void (Parameters &, ResultSet &, ResultSet &, ResultSet &)>)(generator), batch, s3.c_str());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
auto generator3 = PARAMETRIC_D(){
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
int rows = p.getIntParam("rows");
|
||||||
|
int cols = length[i] / rows;
|
||||||
|
int dim = p.getIntParam("dim");
|
||||||
|
auto arr = NDArrayFactory::create_<T>('c', {rows, cols});
|
||||||
|
|
||||||
|
auto dimArg = new Nd4jLong[1];
|
||||||
|
dimArg[0] = dim;
|
||||||
|
ctx->setIArguments(dimArg, 1);
|
||||||
|
delete[] dimArg;
|
||||||
|
|
||||||
|
ctx->setInputArray(0, arr, true);
|
||||||
|
|
||||||
|
NDArray* result;
|
||||||
|
if(dim == 0){
|
||||||
|
result = NDArrayFactory::create_<Nd4jLong>('c', {cols});
|
||||||
|
} else {
|
||||||
|
result = NDArrayFactory::create_<Nd4jLong>('c', {rows});
|
||||||
|
}
|
||||||
|
ctx->setOutputArray(0, result, true);
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string s5("Argmax Along Dimension - ");
|
||||||
|
s5 += std::to_string(length[i]);
|
||||||
|
|
||||||
|
nd4j::ops::argmax opArgmax;
|
||||||
|
DeclarableBenchmark dbArgmax(opArgmax, "Argmax");
|
||||||
|
output += helper.runOperationSuit(&dbArgmax, generator3, batch, s5.c_str());
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string conv2d(){
|
||||||
|
std::string output;
|
||||||
|
output += "conv2d " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
//Convolution2D op
|
||||||
|
BoolParameters nhwc("nhwc");
|
||||||
|
PredefinedParameters k("k", {2, 3});
|
||||||
|
|
||||||
|
ParametersBatch batch({&nhwc, &k});
|
||||||
|
nd4j::ops::conv2d conv2d;
|
||||||
|
DeclarableBenchmark benchmark(conv2d, "conv2d");
|
||||||
|
|
||||||
|
int hw = 64;
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_D() {
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
int n = p.getIntParam("nhwc");
|
||||||
|
int khw = p.getIntParam("k");
|
||||||
|
|
||||||
|
if (n == 0) {
|
||||||
|
auto input = NDArrayFactory::create_<T>('c', {8, 3, hw, hw});
|
||||||
|
auto output = NDArrayFactory::create_<T>('c', {8, 3, hw, hw});
|
||||||
|
ctx->setInputArray(0, input, true);
|
||||||
|
ctx->setOutputArray(0, output, true);
|
||||||
|
} else {
|
||||||
|
auto input = NDArrayFactory::create_<T>('c', {8, hw, hw, 3});
|
||||||
|
auto output = NDArrayFactory::create_<T>('c', {8, hw, hw, 3});
|
||||||
|
ctx->setInputArray(0, input, true);
|
||||||
|
ctx->setOutputArray(0, output, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto b = NDArrayFactory::create_<T>('c', {3});
|
||||||
|
auto w = NDArrayFactory::create_<T>('c', {khw, khw, 3, 3}); // [kH, kW, iC, oC] always
|
||||||
|
|
||||||
|
ctx->setInputArray(1, w, true);
|
||||||
|
ctx->setInputArray(2, b, true);
|
||||||
|
|
||||||
|
auto args = new Nd4jLong[10];
|
||||||
|
args[0] = args[1] = khw; //Kernel
|
||||||
|
args[2] = args[3] = 1;//Stride
|
||||||
|
args[4] = args[5] = 0; //Pad
|
||||||
|
args[6] = args[7] = 1; //Dilation
|
||||||
|
args[8] = 1; //SAME
|
||||||
|
args[9] = n;//0-nchw, 1=nhwc
|
||||||
|
ctx->setIArguments(args, 10);
|
||||||
|
delete[] args;
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&benchmark, generator, batch, "Conv2d");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string pool2d() {
|
||||||
|
std::string output;
|
||||||
|
output += "pool2d " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
//Convolution2D op
|
||||||
|
BoolParameters nhwc("nhwc");
|
||||||
|
PredefinedParameters k("k", {2, 3});
|
||||||
|
|
||||||
|
ParametersBatch batch({&nhwc, &k});
|
||||||
|
|
||||||
|
int c = 3;
|
||||||
|
int hw = 64;
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_D() {
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
int n = p.getIntParam("nhwc");
|
||||||
|
int khw = p.getIntParam("k");
|
||||||
|
|
||||||
|
if (n == 0) {
|
||||||
|
auto input = NDArrayFactory::create_<T>('c', {8, c, hw, hw});
|
||||||
|
auto output = NDArrayFactory::create_<T>('c', {8, c, hw, hw});
|
||||||
|
ctx->setInputArray(0, input, true);
|
||||||
|
ctx->setOutputArray(0, output, true);
|
||||||
|
} else {
|
||||||
|
auto input = NDArrayFactory::create_<T>('c', {8, hw, hw, c});
|
||||||
|
auto output = NDArrayFactory::create_<T>('c', {8, hw, hw, c});
|
||||||
|
ctx->setInputArray(0, input, true);
|
||||||
|
ctx->setOutputArray(0, output, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto args = new Nd4jLong[11];
|
||||||
|
args[0] = args[1] = khw; //Kernel
|
||||||
|
args[2] = args[3] = 1;//Stride
|
||||||
|
args[4] = args[5] = 0; //Pad
|
||||||
|
args[6] = args[7] = 1; //Dilation
|
||||||
|
args[8] = 1; //SAME
|
||||||
|
args[9] = 0; //Divisor mode - 0 = exclude padding in divisor
|
||||||
|
args[10] = n;//0-nchw, 1=nhwc
|
||||||
|
ctx->setIArguments(args, 11);
|
||||||
|
delete[] args;
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
nd4j::ops::avgpool2d avgpool2d;
|
||||||
|
DeclarableBenchmark benchmark1(avgpool2d, "avgpool");
|
||||||
|
output += helper.runOperationSuit(&benchmark1, generator, batch, "Average Pool 2d");
|
||||||
|
|
||||||
|
nd4j::ops::maxpool2d maxpool2d;
|
||||||
|
DeclarableBenchmark benchmark2(maxpool2d, "maxpool");
|
||||||
|
output += helper.runOperationSuit(&benchmark2, generator, batch, "Max Pool 2d");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::string lstmBenchmark() {
|
||||||
|
std::string output;
|
||||||
|
output += "lstm " + DataTypeUtils::asString(DataTypeUtils::fromT<T>());
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
BoolParameters format("format"); //0=TNS=[seqLen,mb,size]; 1=NST=[mb,size,seqLen]
|
||||||
|
PredefinedParameters mb("mb", {1, 8});
|
||||||
|
int n = 128;
|
||||||
|
|
||||||
|
ParametersBatch batch({&format, &mb});
|
||||||
|
nd4j::ops::lstmBlock lstmBlock;
|
||||||
|
DeclarableBenchmark benchmark(lstmBlock, "lstm");
|
||||||
|
|
||||||
|
int seqLength = 8;
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_D() {
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
int f = p.getIntParam("format");
|
||||||
|
int m = p.getIntParam("mb");
|
||||||
|
|
||||||
|
Nd4jLong l = 0;
|
||||||
|
ctx->setInputArray(0, NDArrayFactory::create_<Nd4jLong>(l), true); //Max TS length (unused)
|
||||||
|
|
||||||
|
|
||||||
|
if (f == 0) {
|
||||||
|
//TNS format
|
||||||
|
ctx->setInputArray(1, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //x
|
||||||
|
ctx->setOutputArray(0, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //i
|
||||||
|
ctx->setOutputArray(1, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //c
|
||||||
|
ctx->setOutputArray(2, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //f
|
||||||
|
ctx->setOutputArray(3, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //o
|
||||||
|
ctx->setOutputArray(4, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //z
|
||||||
|
ctx->setOutputArray(5, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //h
|
||||||
|
ctx->setOutputArray(6, NDArrayFactory::create_<T>('c', {seqLength, m, n}), true); //y
|
||||||
|
} else {
|
||||||
|
//NST format
|
||||||
|
ctx->setInputArray(1, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //x
|
||||||
|
ctx->setOutputArray(0, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //i
|
||||||
|
ctx->setOutputArray(1, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //c
|
||||||
|
ctx->setOutputArray(2, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //f
|
||||||
|
ctx->setOutputArray(3, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //o
|
||||||
|
ctx->setOutputArray(4, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //z
|
||||||
|
ctx->setOutputArray(5, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //h
|
||||||
|
ctx->setOutputArray(6, NDArrayFactory::create_<T>('f', {m, n, seqLength}), true); //y
|
||||||
|
}
|
||||||
|
|
||||||
|
auto cLast = NDArrayFactory::create_<T>('c', {m, n});
|
||||||
|
auto yLast = NDArrayFactory::create_<T>('c', {m, n});
|
||||||
|
auto W = NDArrayFactory::create_<T>('c', {2 * n, 4 * n});
|
||||||
|
auto Wci = NDArrayFactory::create_<T>('c', {n});
|
||||||
|
auto Wcf = NDArrayFactory::create_<T>('c', {n});
|
||||||
|
auto Wco = NDArrayFactory::create_<T>('c', {n});
|
||||||
|
auto b = NDArrayFactory::create_<T>('c', {4 * n});
|
||||||
|
|
||||||
|
ctx->setInputArray(2, cLast, true);
|
||||||
|
ctx->setInputArray(3, yLast, true);
|
||||||
|
ctx->setInputArray(4, W, true);
|
||||||
|
ctx->setInputArray(5, Wci, true);
|
||||||
|
ctx->setInputArray(6, Wcf, true);
|
||||||
|
ctx->setInputArray(7, Wco, true);
|
||||||
|
ctx->setInputArray(8, b, true);
|
||||||
|
|
||||||
|
auto iargs = new Nd4jLong[2];
|
||||||
|
iargs[0] = 0; //No peephole
|
||||||
|
iargs[1] = f;
|
||||||
|
ctx->setIArguments(iargs, 2);
|
||||||
|
delete[] iargs;
|
||||||
|
|
||||||
|
auto targs = new double[2];
|
||||||
|
targs[0] = 1.0; //forget bias
|
||||||
|
targs[1] = 0.0; //cell clipping value
|
||||||
|
ctx->setTArguments(targs, 2);
|
||||||
|
delete[] targs;
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
output += helper.runOperationSuit(&benchmark, generator, batch, "LSTMBlock");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string broadcast2d() {
|
||||||
|
std::string output;
|
||||||
|
BenchmarkHelper helper(WARMUP, NUM_ITER);
|
||||||
|
|
||||||
|
int rows = 65536;
|
||||||
|
IntPowerParameters cols("cols", 2, 2, 12, 4); //2^2 to 2^12 in steps of 2 - 2^1=2, ..., 2^10=1024
|
||||||
|
BoolParameters axis("axis");
|
||||||
|
BoolParameters inplace("inplace");
|
||||||
|
|
||||||
|
ParametersBatch batch({&cols, &axis, &inplace});
|
||||||
|
|
||||||
|
auto generator = PARAMETRIC_D() {
|
||||||
|
auto a = p.getIntParam("axis");
|
||||||
|
auto arr = NDArrayFactory::create_<float>('c', {rows, p.getIntParam("cols")});
|
||||||
|
|
||||||
|
auto ctx = new Context(1);
|
||||||
|
ctx->setInputArray(0, arr, true);
|
||||||
|
if(a == 0){
|
||||||
|
ctx->setInputArray(1, NDArrayFactory::create_<float>('c', {rows, 1}), true);
|
||||||
|
} else {
|
||||||
|
ctx->setInputArray(1, NDArrayFactory::create_<float>('c', {1, p.getIntParam("cols")}), true);
|
||||||
|
}
|
||||||
|
if (p.getIntParam("inplace") == 1) {
|
||||||
|
ctx->setOutputArray(0, arr);
|
||||||
|
ctx->markInplace(true);
|
||||||
|
} else {
|
||||||
|
ctx->setOutputArray(0, NDArrayFactory::create_<float>('c', {rows, p.getIntParam("cols")}), true);
|
||||||
|
}
|
||||||
|
return ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string s("add");
|
||||||
|
nd4j::ops::add op;
|
||||||
|
DeclarableBenchmark benchmark(op, "add");
|
||||||
|
output += helper.runOperationSuit(&benchmark, generator, batch, "Broadcast (Custom) Add - 2d");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string LightBenchmarkSuit::runSuit() {
|
||||||
|
#ifdef _RELEASE
|
||||||
|
std::vector<nd4j::DataType> dtypes({nd4j::DataType::FLOAT32, nd4j::DataType::HALF});
|
||||||
|
#else
|
||||||
|
std::vector<nd4j::DataType> dtypes({nd4j::DataType::FLOAT32});
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
for (auto t:dtypes) {
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.transformBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += transformBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.scalarBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += scalarBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.pairwiseBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += pairwiseBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.reduceFullBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += reduceFullBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.reduceDimBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += reduceDimBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.gemmBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += gemmBenchmark, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.conv2d [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += conv2d, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.pool2d [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += pool2d, (), LIBND4J_TYPES);
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.lstmBenchmark [%s]\n", DataTypeUtils::asString(t).c_str());
|
||||||
|
BUILD_SINGLE_SELECTOR(t, result += lstmBenchmark, (), LIBND4J_TYPES);
|
||||||
|
}
|
||||||
|
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.broadcast2d\n", "");
|
||||||
|
result += broadcast2d();
|
||||||
|
nd4j_printf("Running LightBenchmarkSuite.mismatchedOrderAssign\n", "");
|
||||||
|
result += mismatchedOrderAssign();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -311,3 +311,27 @@ TEST_F(DeclarableOpsTests15, test_lstmBlock_1) {
|
||||||
|
|
||||||
delete result;
|
delete result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DeclarableOpsTests15, test_lstmBlock_2) {
|
||||||
|
int seqLength = 32;
|
||||||
|
int m = 64;
|
||||||
|
int n = 32;
|
||||||
|
|
||||||
|
auto x0 = NDArrayFactory::create<Nd4jLong>(5);
|
||||||
|
auto x1 = NDArrayFactory::create<float>('f', {m, n, seqLength});
|
||||||
|
auto x2 = NDArrayFactory::create<float>('f', {m, n});
|
||||||
|
auto x3 = NDArrayFactory::create<float>('f', {m, n});
|
||||||
|
auto x4 = NDArrayFactory::create<float>('f', {2 * n, 4 * n});
|
||||||
|
auto x5 = NDArrayFactory::create<float>('f', {n});
|
||||||
|
auto x6 = NDArrayFactory::create<float>('f', {n});
|
||||||
|
auto x7 = NDArrayFactory::create<float>('f', {n});
|
||||||
|
auto x8 = NDArrayFactory::create<float>('f', {4 * n});
|
||||||
|
|
||||||
|
nd4j::ops::lstmBlock op;
|
||||||
|
auto result = op.execute({&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &x8}, {1.0, 0.0}, {0, 1});
|
||||||
|
ASSERT_EQ(Status::OK(), result->status());
|
||||||
|
|
||||||
|
auto z = result->at(0);
|
||||||
|
|
||||||
|
delete result;
|
||||||
|
}
|
||||||
|
|
|
@ -38,6 +38,8 @@
|
||||||
#include <helpers/ConstantShapeHelper.h>
|
#include <helpers/ConstantShapeHelper.h>
|
||||||
#include <helpers/ConstantTadHelper.h>
|
#include <helpers/ConstantTadHelper.h>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <performance/benchmarking/FullBenchmarkSuit.h>
|
||||||
|
#include <performance/benchmarking/LightBenchmarkSuit.h>
|
||||||
|
|
||||||
using namespace nd4j;
|
using namespace nd4j;
|
||||||
using namespace nd4j::graph;
|
using namespace nd4j::graph;
|
||||||
|
@ -164,6 +166,12 @@ TEST_F(PlaygroundTests, BroadcastOps2d) {
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
TEST_F(PlaygroundTests, test_benchmark_suit_1) {
|
||||||
|
//LightBenchmarkSuit suit;
|
||||||
|
//auto output = suit.runSuit();
|
||||||
|
//nd4j_printf("SUIT OUTPUT\n%s\n", output.data());
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(PlaygroundTests, test_small_reductions) {
|
TEST_F(PlaygroundTests, test_small_reductions) {
|
||||||
auto f = NDArrayFactory::create<float>('c', {1024 ,1024});
|
auto f = NDArrayFactory::create<float>('c', {1024 ,1024});
|
||||||
f.assign(1.0f);
|
f.assign(1.0f);
|
||||||
|
|
|
@ -193,6 +193,7 @@ if ("${OPENBLAS}" OR CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
file(GLOB_RECURSE PERF_SOURCES false ../../include/performance/*.cpp ../../include/performance/*.h)
|
||||||
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../../include/exceptions/*.cpp ../../include/exceptions/*.h)
|
file(GLOB_RECURSE EXCEPTIONS_SOURCES false ../../include/exceptions/*.cpp ../../include/exceptions/*.h)
|
||||||
file(GLOB_RECURSE EXEC_SOURCES false ../../include/execution/*.cpp ../../include/execution/*.h)
|
file(GLOB_RECURSE EXEC_SOURCES false ../../include/execution/*.cpp ../../include/execution/*.h)
|
||||||
file(GLOB_RECURSE TYPES_SOURCES false ../../include/types/*.cpp ../../include/types/*.h)
|
file(GLOB_RECURSE TYPES_SOURCES false ../../include/types/*.cpp ../../include/types/*.h)
|
||||||
|
@ -234,7 +235,7 @@ add_executable(runtests ${LOOPS_SOURCES} ../../blas/cpu/NativeOps.cpp ../../blas
|
||||||
../../include/cnpy/cnpy.cpp ../../include/nd4jmemset.h ../../include/nd4jmalloc.h
|
../../include/cnpy/cnpy.cpp ../../include/nd4jmemset.h ../../include/nd4jmalloc.h
|
||||||
../../blas/Environment.cpp ../../blas/Environment.h ${EXEC_SOURCES} ${HELPERS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
../../blas/Environment.cpp ../../blas/Environment.h ${EXEC_SOURCES} ${HELPERS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_HELPERS_SOURCES}
|
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_HELPERS_SOURCES}
|
||||||
${OPS_SOURCES} ${TEST_SOURCES})
|
${OPS_SOURCES} ${TEST_SOURCES} ${PERF_SOURCES})
|
||||||
|
|
||||||
target_link_libraries(runtests gtest ${MKLDNN} gtest_main ${BLAS_LIBRARIES})
|
target_link_libraries(runtests gtest ${MKLDNN} gtest_main ${BLAS_LIBRARIES})
|
||||||
|
|
||||||
|
|
|
@ -917,4 +917,14 @@ public class DefaultOpExecutioner implements OpExecutioner {
|
||||||
public DataBuffer createConstantBuffer(double[] values, DataType desiredType) {
|
public DataBuffer createConstantBuffer(double[] values, DataType desiredType) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runLightBenchmarkSuit(boolean printOut) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runFullBenchmarkSuit(boolean printOut) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -463,4 +463,8 @@ public interface OpExecutioner {
|
||||||
DataBuffer createConstantBuffer(int[] values, DataType desiredType);
|
DataBuffer createConstantBuffer(int[] values, DataType desiredType);
|
||||||
DataBuffer createConstantBuffer(float[] values, DataType desiredType);
|
DataBuffer createConstantBuffer(float[] values, DataType desiredType);
|
||||||
DataBuffer createConstantBuffer(double[] values, DataType desiredType);
|
DataBuffer createConstantBuffer(double[] values, DataType desiredType);
|
||||||
|
|
||||||
|
|
||||||
|
String runLightBenchmarkSuit(boolean printOut);
|
||||||
|
String runFullBenchmarkSuit(boolean printOut);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1386,15 +1386,39 @@ public class Nd4j {
|
||||||
*/
|
*/
|
||||||
public static DataBuffer createBufferDetached(int[] shape, DataType type) {
|
public static DataBuffer createBufferDetached(int[] shape, DataType type) {
|
||||||
long length = ArrayUtil.prodLong(shape);
|
long length = ArrayUtil.prodLong(shape);
|
||||||
if (type == DataType.INT)
|
switch (type){
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createInt(length);
|
case DOUBLE:
|
||||||
if (type == DataType.LONG)
|
return DATA_BUFFER_FACTORY_INSTANCE.createDouble(length);
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createLong(new long[]{length});
|
case FLOAT:
|
||||||
else if (type == DataType.HALF)
|
return DATA_BUFFER_FACTORY_INSTANCE.createFloat(length);
|
||||||
|
case HALF:
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createHalf(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createHalf(length);
|
||||||
|
case BFLOAT16:
|
||||||
return type == DataType.DOUBLE ? DATA_BUFFER_FACTORY_INSTANCE.createDouble(length) : DATA_BUFFER_FACTORY_INSTANCE.createFloat(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createBFloat16(length);
|
||||||
|
case UINT64:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createULong(length);
|
||||||
|
case LONG:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createLong(length);
|
||||||
|
case UINT32:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUInt(length);
|
||||||
|
case INT:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createInt(length);
|
||||||
|
case UINT16:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUShort(length);
|
||||||
|
case SHORT:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createShort(length);
|
||||||
|
case UBYTE:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUByte(length);
|
||||||
|
case BYTE:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createByte(length);
|
||||||
|
case BOOL:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createBool(length);
|
||||||
|
case UTF8:
|
||||||
|
case COMPRESSED:
|
||||||
|
case UNKNOWN:
|
||||||
|
default:
|
||||||
|
throw new UnsupportedOperationException("Cannot create type: " + type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1403,16 +1427,39 @@ public class Nd4j {
|
||||||
public static DataBuffer createBuffer(long[] shape, DataType type) {
|
public static DataBuffer createBuffer(long[] shape, DataType type) {
|
||||||
long length = ArrayUtil.prodLong(shape);
|
long length = ArrayUtil.prodLong(shape);
|
||||||
|
|
||||||
if (type == DataType.INT)
|
switch (type) {
|
||||||
|
case BOOL:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createBool(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createBool(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case UBYTE:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createUByte(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createUByte(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case UINT16:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createUShort(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createUShort(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case UINT32:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createUInt(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createUInt(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case UINT64:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createULong(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createULong(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case BYTE:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createByte(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createByte(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case SHORT:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createShort(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createShort(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case INT:
|
||||||
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
else if (type == DataType.LONG)
|
case LONG:
|
||||||
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
else if (type == DataType.HALF)
|
case HALF:
|
||||||
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
else if (type == DataType.DOUBLE)
|
case BFLOAT16:
|
||||||
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createBFloat16(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createBFloat16(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
else
|
case FLOAT:
|
||||||
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case DOUBLE:
|
||||||
|
return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
|
||||||
|
case UTF8:
|
||||||
|
case COMPRESSED:
|
||||||
|
case UNKNOWN:
|
||||||
|
default:
|
||||||
|
throw new UnsupportedOperationException("Cannot create type: " + type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1424,19 +1471,31 @@ public class Nd4j {
|
||||||
switch (type){
|
switch (type){
|
||||||
|
|
||||||
case DOUBLE:
|
case DOUBLE:
|
||||||
DATA_BUFFER_FACTORY_INSTANCE.createDouble(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createDouble(length);
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
DATA_BUFFER_FACTORY_INSTANCE.createFloat(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createFloat(length);
|
||||||
case HALF:
|
case HALF:
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createHalf(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createHalf(length);
|
||||||
|
case BFLOAT16:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createBFloat16(length);
|
||||||
|
case UINT64:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createULong(length);
|
||||||
case LONG:
|
case LONG:
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createLong(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createLong(length);
|
||||||
|
case UINT32:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUInt(length);
|
||||||
case INT:
|
case INT:
|
||||||
return DATA_BUFFER_FACTORY_INSTANCE.createInt(length);
|
return DATA_BUFFER_FACTORY_INSTANCE.createInt(length);
|
||||||
|
case UINT16:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUShort(length);
|
||||||
case SHORT:
|
case SHORT:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createShort(length);
|
||||||
case UBYTE:
|
case UBYTE:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createUByte(length);
|
||||||
case BYTE:
|
case BYTE:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createByte(length);
|
||||||
case BOOL:
|
case BOOL:
|
||||||
|
return DATA_BUFFER_FACTORY_INSTANCE.createBool(length);
|
||||||
case UTF8:
|
case UTF8:
|
||||||
case COMPRESSED:
|
case COMPRESSED:
|
||||||
case UNKNOWN:
|
case UNKNOWN:
|
||||||
|
|
|
@ -1161,4 +1161,10 @@ public abstract class NativeOps extends Pointer {
|
||||||
public abstract Pointer constantBuffer(int dtype, DoublePointer data, int length);
|
public abstract Pointer constantBuffer(int dtype, DoublePointer data, int length);
|
||||||
|
|
||||||
public abstract Pointer constantBuffer(int dtype, @Cast("Nd4jLong *") LongPointer data, int length);
|
public abstract Pointer constantBuffer(int dtype, @Cast("Nd4jLong *") LongPointer data, int length);
|
||||||
|
|
||||||
|
public abstract String runLightBenchmarkSuit(boolean printOut);
|
||||||
|
|
||||||
|
public abstract String runFullBenchmarkSuit(boolean printOut);
|
||||||
|
|
||||||
|
public abstract long getCachedMemory(int deviceId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -418,6 +418,126 @@ public class CudaDataBufferFactory implements DataBufferFactory {
|
||||||
return new CudaIntDataBuffer(length);
|
return new CudaIntDataBuffer(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length) {
|
||||||
|
return new CudaBfloat16DataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length) {
|
||||||
|
return new CudaUInt32DataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length) {
|
||||||
|
return new CudaUInt16DataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length) {
|
||||||
|
return new CudaUByteDataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length) {
|
||||||
|
return new CudaUInt64DataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length) {
|
||||||
|
return new CudaBoolDataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length) {
|
||||||
|
return new CudaShortDataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length) {
|
||||||
|
return new CudaByteDataBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length, boolean initialize) {
|
||||||
|
return new CudaBfloat16DataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length, boolean initialize) {
|
||||||
|
return new CudaUInt32DataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length, boolean initialize) {
|
||||||
|
return new CudaUInt16DataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length, boolean initialize) {
|
||||||
|
return new CudaUByteDataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length, boolean initialize) {
|
||||||
|
return new CudaUInt64DataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length, boolean initialize) {
|
||||||
|
return new CudaBoolDataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length, boolean initialize) {
|
||||||
|
return new CudaShortDataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length, boolean initialize) {
|
||||||
|
return new CudaByteDataBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaBfloat16DataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaUInt32DataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaUInt16DataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaUByteDataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaUInt64DataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaBoolDataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaShortDataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new CudaByteDataBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createInt(long length, boolean initialize) {
|
public DataBuffer createInt(long length, boolean initialize) {
|
||||||
return new CudaIntDataBuffer(length, initialize);
|
return new CudaIntDataBuffer(length, initialize);
|
||||||
|
|
|
@ -2757,6 +2757,16 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runLightBenchmarkSuit(boolean printOut) {
|
||||||
|
return nativeOps.runLightBenchmarkSuit(printOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runFullBenchmarkSuit(boolean printOut) {
|
||||||
|
return nativeOps.runFullBenchmarkSuit(printOut);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1977,6 +1977,13 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
*/
|
*/
|
||||||
public native int getDeviceMajor(int deviceId);
|
public native int getDeviceMajor(int deviceId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method returns amount of cached memory
|
||||||
|
* @param deviceId
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public native @Cast("Nd4jLong") long getCachedMemory(int deviceId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param ptrToDeviceId
|
* @param ptrToDeviceId
|
||||||
|
@ -2976,6 +2983,7 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
|
|
||||||
public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId);
|
public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId);
|
||||||
|
|
||||||
|
public native void deleteCharArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deleteIntArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deleteIntArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deleteLongArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deleteLongArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deletePointerArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deletePointerArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
|
@ -3038,6 +3046,10 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, DoubleBuffer data, int length);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, DoubleBuffer data, int length);
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, double[] data, int length);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, double[] data, int length);
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, ConstantDescriptor descriptor);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, ConstantDescriptor descriptor);
|
||||||
|
|
||||||
|
|
||||||
|
public native @Cast("char*") String runLightBenchmarkSuit(@Cast("bool") boolean printOut);
|
||||||
|
public native @Cast("char*") String runFullBenchmarkSuit(@Cast("bool") boolean printOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2185,4 +2185,15 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
sb.append(". Output var names: ").append(Arrays.toString(outNames));
|
sb.append(". Output var names: ").append(Arrays.toString(outNames));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runLightBenchmarkSuit(boolean printOut) {
|
||||||
|
return loop.runLightBenchmarkSuit(printOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String runFullBenchmarkSuit(boolean printOut) {
|
||||||
|
return loop.runFullBenchmarkSuit(printOut);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1977,6 +1977,13 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
*/
|
*/
|
||||||
public native int getDeviceMajor(int deviceId);
|
public native int getDeviceMajor(int deviceId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method returns amount of cached memory
|
||||||
|
* @param deviceId
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public native @Cast("Nd4jLong") long getCachedMemory(int deviceId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param ptrToDeviceId
|
* @param ptrToDeviceId
|
||||||
|
@ -2976,6 +2983,7 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
|
|
||||||
public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId);
|
public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId);
|
||||||
|
|
||||||
|
public native void deleteCharArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deleteIntArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deleteIntArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deleteLongArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deleteLongArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
public native void deletePointerArray(@Cast("Nd4jPointer") Pointer pointer);
|
public native void deletePointerArray(@Cast("Nd4jPointer") Pointer pointer);
|
||||||
|
@ -3038,6 +3046,10 @@ public static class NativeOps extends org.nd4j.nativeblas.NativeOps {
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, DoubleBuffer data, int length);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, DoubleBuffer data, int length);
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, double[] data, int length);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, double[] data, int length);
|
||||||
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, ConstantDescriptor descriptor);
|
public native ConstantDataBuffer constantBuffer(@Cast("nd4j::DataType") int dtype, ConstantDescriptor descriptor);
|
||||||
|
|
||||||
|
|
||||||
|
public native @Cast("char*") String runLightBenchmarkSuit(@Cast("bool") boolean printOut);
|
||||||
|
public native @Cast("char*") String runFullBenchmarkSuit(@Cast("bool") boolean printOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3306,6 +3306,28 @@ public class Nd4jTestsC extends BaseNd4jTest {
|
||||||
log.info("arrayf data: {}", Arrays.toString(arrayf.data().asFloat()));
|
log.info("arrayf data: {}", Arrays.toString(arrayf.data().asFloat()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateDetached_1() {
|
||||||
|
val shape = new int[]{10};
|
||||||
|
val dataTypes = new DataType[] {DataType.DOUBLE, DataType.BOOL, DataType.BYTE, DataType.UBYTE, DataType.SHORT, DataType.UINT16, DataType.INT, DataType.UINT32, DataType.LONG, DataType.UINT64, DataType.FLOAT, DataType.BFLOAT16, DataType.HALF};
|
||||||
|
|
||||||
|
for(DataType dt : dataTypes){
|
||||||
|
val dataBuffer = Nd4j.createBufferDetached(shape, dt);
|
||||||
|
assertEquals(dt, dataBuffer.dataType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateDetached_2() {
|
||||||
|
val shape = new long[]{10};
|
||||||
|
val dataTypes = new DataType[] {DataType.DOUBLE, DataType.BOOL, DataType.BYTE, DataType.UBYTE, DataType.SHORT, DataType.UINT16, DataType.INT, DataType.UINT32, DataType.LONG, DataType.UINT64, DataType.FLOAT, DataType.BFLOAT16, DataType.HALF};
|
||||||
|
|
||||||
|
for(DataType dt : dataTypes){
|
||||||
|
val dataBuffer = Nd4j.createBufferDetached(shape, dt);
|
||||||
|
assertEquals(dt, dataBuffer.dataType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPairwiseMixedC() {
|
public void testPairwiseMixedC() {
|
||||||
int[] shape2 = {12, 8};
|
int[] shape2 = {12, 8};
|
||||||
|
@ -7889,6 +7911,7 @@ public class Nd4jTestsC extends BaseNd4jTest {
|
||||||
assertEquals(Nd4j.createFromArray(1f, 3f, 4f), out);
|
assertEquals(Nd4j.createFromArray(1f, 3f, 4f), out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static INDArray fwd(INDArray input, INDArray W, INDArray b){
|
private static INDArray fwd(INDArray input, INDArray W, INDArray b){
|
||||||
INDArray ret = Nd4j.createUninitialized(input.size(0), W.size(1));
|
INDArray ret = Nd4j.createUninitialized(input.size(0), W.size(1));
|
||||||
input.mmuli(W, ret);
|
input.mmuli(W, ret);
|
||||||
|
|
|
@ -355,6 +355,7 @@ public interface DataBufferFactory {
|
||||||
|
|
||||||
DataBuffer create(DataType dataType, long length, boolean initialize, MemoryWorkspace workspace);
|
DataBuffer create(DataType dataType, long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an int data buffer
|
* Create an int data buffer
|
||||||
*
|
*
|
||||||
|
@ -363,6 +364,33 @@ public interface DataBufferFactory {
|
||||||
*/
|
*/
|
||||||
DataBuffer createInt(long length);
|
DataBuffer createInt(long length);
|
||||||
|
|
||||||
|
DataBuffer createBFloat16(long length);
|
||||||
|
DataBuffer createByte(long length);
|
||||||
|
DataBuffer createShort(long length);
|
||||||
|
DataBuffer createBool(long length);
|
||||||
|
DataBuffer createUShort(long length);
|
||||||
|
DataBuffer createUInt(long length);
|
||||||
|
DataBuffer createUByte(long length);
|
||||||
|
DataBuffer createULong(long length);
|
||||||
|
|
||||||
|
DataBuffer createBFloat16(long length, boolean initialize);
|
||||||
|
DataBuffer createByte(long length, boolean initialize);
|
||||||
|
DataBuffer createShort(long length, boolean initialize);
|
||||||
|
DataBuffer createBool(long length, boolean initialize);
|
||||||
|
DataBuffer createUShort(long length, boolean initialize);
|
||||||
|
DataBuffer createUInt(long length, boolean initialize);
|
||||||
|
DataBuffer createUByte(long length, boolean initialize);
|
||||||
|
DataBuffer createULong(long length, boolean initialize);
|
||||||
|
|
||||||
|
DataBuffer createBFloat16(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createByte(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createShort(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createBool(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createUShort(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createUInt(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createUByte(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
DataBuffer createULong(long length, boolean initialize, MemoryWorkspace workspace);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an int data buffer, with optional initialization
|
* Create an int data buffer, with optional initialization
|
||||||
*
|
*
|
||||||
|
|
|
@ -354,11 +354,132 @@ public class DefaultDataBufferFactory implements DataBufferFactory {
|
||||||
return new IntBuffer(length);
|
return new IntBuffer(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length) {
|
||||||
|
return new BFloat16Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length) {
|
||||||
|
return new UInt32Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length) {
|
||||||
|
return new UInt16Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length) {
|
||||||
|
return new UInt8Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length) {
|
||||||
|
return new UInt64Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length) {
|
||||||
|
return new BoolBuffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length) {
|
||||||
|
return new Int16Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length) {
|
||||||
|
return new Int8Buffer(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length, boolean initialize) {
|
||||||
|
return new BFloat16Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length, boolean initialize) {
|
||||||
|
return new UInt32Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length, boolean initialize) {
|
||||||
|
return new UInt16Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length, boolean initialize) {
|
||||||
|
return new UInt8Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length, boolean initialize) {
|
||||||
|
return new UInt64Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length, boolean initialize) {
|
||||||
|
return new BoolBuffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length, boolean initialize) {
|
||||||
|
return new Int16Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length, boolean initialize) {
|
||||||
|
return new Int8Buffer(length, initialize);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createInt(long length, boolean initialize) {
|
public DataBuffer createInt(long length, boolean initialize) {
|
||||||
return new IntBuffer(length, initialize);
|
return new IntBuffer(length, initialize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBFloat16(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new BFloat16Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUInt(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new UInt32Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUShort(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new UInt16Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createUByte(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new UInt8Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createULong(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new UInt64Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createBool(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new BoolBuffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createShort(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new Int16Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer createByte(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
|
return new Int8Buffer(length, initialize, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createInt(long length, boolean initialize, MemoryWorkspace workspace) {
|
public DataBuffer createInt(long length, boolean initialize, MemoryWorkspace workspace) {
|
||||||
return new IntBuffer(length, initialize, workspace);
|
return new IntBuffer(length, initialize, workspace);
|
||||||
|
@ -665,12 +786,12 @@ public class DefaultDataBufferFactory implements DataBufferFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createHalf(long length) {
|
public DataBuffer createHalf(long length) {
|
||||||
throw new UnsupportedOperationException("FP16 isn't supported for CPU yet");
|
return new HalfBuffer(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createHalf(long length, boolean initialize) {
|
public DataBuffer createHalf(long length, boolean initialize) {
|
||||||
throw new UnsupportedOperationException("FP16 isn't supported for CPU yet");
|
return new HalfBuffer(length, initialize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue