From dd2043ef485a96de3d64563f1eed4c50a8cd72f7 Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 9 Mar 2020 08:21:44 +0300 Subject: [PATCH] OpenMP Threads execution (#297) * omp threads backported Signed-off-by: raver119 * omp scalar reduce Signed-off-by: raver119 * timing Signed-off-by: raver119 * timing Signed-off-by: raver119 * minor tweaks Signed-off-by: raver119 * minor tweaks Signed-off-by: raver119 * namespace change Signed-off-by: raver119 * num_threads Signed-off-by: raver119 * one minor fix Signed-off-by: raver119 --- libnd4j/CMakeLists.txt | 4 +- libnd4j/include/array/DataTypeConversions.h | 8 +- libnd4j/include/array/NDArray.hXX | 20 +- libnd4j/include/array/cpu/NDArray.cpp | 10 +- libnd4j/include/array/cpu/NDArrayLambda.hpp | 30 +- libnd4j/include/execution/BlockingQueue.h | 2 +- libnd4j/include/execution/CallableInterface.h | 2 +- .../include/execution/CallableWithArguments.h | 2 +- libnd4j/include/execution/Engine.h | 2 +- libnd4j/include/execution/ExecutionMode.h | 2 +- libnd4j/include/execution/ThreadPool.h | 2 +- libnd4j/include/execution/Threads.h | 16 +- libnd4j/include/execution/Ticket.h | 2 +- .../include/execution/impl/BlockingQueue.cpp | 2 +- .../execution/impl/CallableInterface.cpp | 2 +- .../execution/impl/CallableWithArguments.cpp | 2 +- libnd4j/include/execution/impl/ThreadPool.cpp | 4 +- libnd4j/include/execution/impl/Threads.cpp | 554 ++++++++++++------ libnd4j/include/execution/impl/Ticket.cpp | 6 +- libnd4j/include/graph/Context.h | 6 +- libnd4j/include/graph/ContextPrototype.h | 6 +- libnd4j/include/graph/impl/Context.cpp | 10 +- .../include/graph/impl/ContextPrototype.cpp | 2 +- libnd4j/include/helpers/Loops.h | 26 +- libnd4j/include/helpers/cpu/MmulHelper.cpp | 10 +- .../helpers/cpu/loops/IndexReductionLoops.hpp | 20 +- libnd4j/include/helpers/impl/DebugHelper.cpp | 2 +- .../legacy/cpu/NativeOpExecutioner.cpp | 54 +- libnd4j/include/legacy/cpu/NativeOps.cpp | 12 +- libnd4j/include/legacy/cuda/NativeOps.cu | 2 +- .../include/loops/cpu/TrueBroadcastHelper.hpp | 10 +- libnd4j/include/loops/cpu/indexreduce.hpp | 4 +- libnd4j/include/loops/cpu/random.hpp | 22 +- .../include/loops/cpu/reduce/reduce_bool.cpp | 19 +- .../include/loops/cpu/reduce/reduce_float.hpp | 24 +- .../include/loops/cpu/reduce/reduce_long.cpp | 23 +- .../include/loops/cpu/reduce/reduce_same.cpp | 23 +- libnd4j/include/loops/cpu/reduce3.hpp | 6 +- .../include/loops/cpu/summarystatsreduce.cpp | 2 +- .../include/loops/impl/type_conversions.cpp | 6 +- libnd4j/include/ops/declarable/DeclarableOp.h | 2 +- .../include/ops/declarable/EmptyHandling.h | 2 +- .../include/ops/declarable/OpRegistrator.h | 16 +- .../include/ops/declarable/PlatformHelper.h | 6 +- .../declarable/helpers/cpu/BarnesHutTsne.cpp | 2 +- .../declarable/helpers/cpu/activations.cpp | 2 +- .../ops/declarable/helpers/cpu/addBias.cpp | 4 +- .../ops/declarable/helpers/cpu/adjust_hue.cpp | 4 +- .../helpers/cpu/adjust_saturation.cpp | 4 +- .../declarable/helpers/cpu/batched_gemm.cpp | 2 +- .../ops/declarable/helpers/cpu/batchnorm.cpp | 4 +- .../ops/declarable/helpers/cpu/betaInc.cpp | 2 +- .../ops/declarable/helpers/cpu/col2im.cpp | 4 +- .../declarable/helpers/cpu/compare_elem.cpp | 4 +- .../ops/declarable/helpers/cpu/confusion.cpp | 2 +- .../declarable/helpers/cpu/convolutions.cpp | 40 +- .../helpers/cpu/crop_and_resize.hpp | 2 +- .../ops/declarable/helpers/cpu/cross.cpp | 2 +- .../ops/declarable/helpers/cpu/d_t_s.cpp | 4 +- .../ops/declarable/helpers/cpu/diGamma.cpp | 2 +- .../ops/declarable/helpers/cpu/dilation2d.cpp | 2 +- .../ops/declarable/helpers/cpu/dropout.cpp | 4 +- .../ops/declarable/helpers/cpu/dynamic.cpp | 4 +- .../helpers/cpu/extract_patches.cpp | 2 +- .../ops/declarable/helpers/cpu/gather.cpp | 10 +- .../ops/declarable/helpers/cpu/hamming.cpp | 6 +- .../ops/declarable/helpers/cpu/hashcode.cpp | 4 +- .../ops/declarable/helpers/cpu/im2col.cpp | 4 +- .../helpers/cpu/image_draw_bounding_boxes.cpp | 2 +- .../declarable/helpers/cpu/image_resize.cpp | 22 +- .../declarable/helpers/cpu/imagesHelpers.cpp | 16 +- .../ops/declarable/helpers/cpu/ismax.cpp | 2 +- .../ops/declarable/helpers/cpu/lrn.cpp | 8 +- .../ops/declarable/helpers/cpu/lstm.cpp | 2 +- .../ops/declarable/helpers/cpu/lup.cpp | 16 +- .../declarable/helpers/cpu/matrixSetDiag.cpp | 2 +- .../helpers/cpu/matrix_diag_part.cpp | 2 +- .../declarable/helpers/cpu/nth_element.cpp | 2 +- .../ops/declarable/helpers/cpu/one_hot.cpp | 4 +- .../ops/declarable/helpers/cpu/polyGamma.cpp | 2 +- .../include/ops/declarable/helpers/cpu/qr.cpp | 4 +- .../ops/declarable/helpers/cpu/random.cpp | 2 +- .../ops/declarable/helpers/cpu/range.cpp | 2 +- .../ops/declarable/helpers/cpu/reverse.cpp | 18 +- .../ops/declarable/helpers/cpu/s_t_b.cpp | 8 +- .../ops/declarable/helpers/cpu/s_t_d.cpp | 4 +- .../ops/declarable/helpers/cpu/scatter.cpp | 14 +- .../ops/declarable/helpers/cpu/segment.cpp | 34 +- .../declarable/helpers/cpu/sequence_mask.cpp | 2 +- .../ops/declarable/helpers/cpu/sg_cb.cpp | 4 +- .../ops/declarable/helpers/cpu/softmax.cpp | 6 +- .../ops/declarable/helpers/cpu/solve.cpp | 2 +- .../ops/declarable/helpers/cpu/split.cpp | 2 +- .../ops/declarable/helpers/cpu/sru.cpp | 4 +- .../ops/declarable/helpers/cpu/stack.cpp | 8 +- .../ops/declarable/helpers/cpu/top_k.cpp | 2 +- .../ops/declarable/helpers/cpu/transforms.cpp | 38 +- .../helpers/cpu/triangular_solve.cpp | 4 +- .../ops/declarable/helpers/cpu/zeta.cpp | 2 +- .../include/ops/declarable/helpers/cross.h | 2 +- .../ops/declarable/helpers/impl/unique.cpp | 2 +- .../ops/declarable/impl/DeclarableOp.cpp | 4 +- .../ops/declarable/impl/OpRegistrator.cpp | 18 +- .../ops/declarable/impl/PlatformHelper.cpp | 4 +- .../platform/mkldnn/avgpooling2d.cpp | 2 +- .../declarable/platform/mkldnn/mkldnnUtils.h | 2 +- libnd4j/include/ops/impl/gemm.cpp | 8 +- libnd4j/include/ops/impl/specials_double.hpp | 6 +- libnd4j/include/ops/impl/specials_single.hpp | 26 +- libnd4j/include/ops/special_random_ops.h | 14 +- libnd4j/include/system/platform_boilerplate.h | 2 +- libnd4j/tests_cpu/layers_tests/CuDnnTests.cu | 4 +- .../layers_tests/PerformanceTests.cpp | 2 +- .../layers_tests/PlaygroundTests.cpp | 22 +- .../tests_cpu/layers_tests/ThreadsTests.cpp | 10 +- .../tests_cpu/libnd4j_tests/CMakeLists.txt | 2 +- .../TFGraphs/TFGraphTestAllSameDiff.java | 2 +- 117 files changed, 888 insertions(+), 591 deletions(-) diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 712d123be..18337d864 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -21,9 +21,9 @@ if (SD_CUDA) enable_language(CUDA) set(CMAKE_CUDA_STANDARD 11) - set(DEFAULT_ENGINE "samediff::ENGINE_CUDA") + set(DEFAULT_ENGINE "sd::ENGINE_CUDA") else() - set(DEFAULT_ENGINE "samediff::ENGINE_CPU") + set(DEFAULT_ENGINE "sd::ENGINE_CPU") endif() # MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively diff --git a/libnd4j/include/array/DataTypeConversions.h b/libnd4j/include/array/DataTypeConversions.h index 44f555533..c203e75ca 100644 --- a/libnd4j/include/array/DataTypeConversions.h +++ b/libnd4j/include/array/DataTypeConversions.h @@ -56,7 +56,7 @@ namespace sd { buffer[e] = canKeep ? static_cast(tmp[e]) : BitwiseUtils::swap_bytes(static_cast(tmp[e])); }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); #endif delete[] tmp; @@ -114,7 +114,7 @@ namespace sd { buffer[e] = canKeep ? static_cast(tmp[e]) : BitwiseUtils::swap_bytes(static_cast(tmp[e])); }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); #endif delete[] tmp; @@ -142,7 +142,7 @@ namespace sd { buffer[e] = canKeep ? static_cast(tmp[e]) : BitwiseUtils::swap_bytes(static_cast(tmp[e])); }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); #endif delete[] tmp; } @@ -168,7 +168,7 @@ namespace sd { buffer[e] = canKeep ? static_cast(tmp[e]) : BitwiseUtils::swap_bytes(static_cast(tmp[e])); }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); #endif delete[] tmp; } diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 38ea087ba..aa3f2b9b0 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -515,7 +515,7 @@ NDArray::NDArray(const std::vector& shape, const std::vector& shape, const std::vector& shape, const std::vector& shape, const std::vector& shape, const std::vector& shape, const std::vector& reps) const { } }; - samediff::Threads::parallel_for(func, 0, resultLen); + sd::Threads::parallel_for(func, 0, resultLen); } else { @@ -284,7 +284,7 @@ NDArray NDArray::tile(const std::vector& reps) const { } }; - samediff::Threads::parallel_for(func, 0, resultLen); + sd::Threads::parallel_for(func, 0, resultLen); } result.tickWriteHost(); return result; @@ -397,7 +397,7 @@ static void repeat_(const NDArray& input, NDArray& output, const std::vector& func, NDArray& target) { z[e] = func(f[e]); }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } else { if (f == z) { @@ -172,7 +172,7 @@ void NDArray::applyLambda(const std::function& func, NDArray& target) { } }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } else { auto loop = PRAGMA_THREADS_FOR { @@ -184,7 +184,7 @@ void NDArray::applyLambda(const std::function& func, NDArray& target) { } }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } } } @@ -221,7 +221,7 @@ void NDArray::applyIndexedLambda(const std::function& func, NDAr z[e] = func(e, f[e]); }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } else { if (f == z) { @@ -233,7 +233,7 @@ void NDArray::applyIndexedLambda(const std::function& func, NDAr } }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } else { auto loop = PRAGMA_THREADS_FOR { @@ -245,7 +245,7 @@ void NDArray::applyIndexedLambda(const std::function& func, NDAr } }; - samediff::Threads::parallel_for(loop, 0, _length); + sd::Threads::parallel_for(loop, 0, _length); } } } @@ -287,7 +287,7 @@ void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function #include -namespace samediff { +namespace sd { template class BlockingQueue { private: diff --git a/libnd4j/include/execution/CallableInterface.h b/libnd4j/include/execution/CallableInterface.h index aad83b379..9dc11c97f 100644 --- a/libnd4j/include/execution/CallableInterface.h +++ b/libnd4j/include/execution/CallableInterface.h @@ -29,7 +29,7 @@ #include #include -namespace samediff { +namespace sd { /** * This class is suited for passing functions to execution threads without queues */ diff --git a/libnd4j/include/execution/CallableWithArguments.h b/libnd4j/include/execution/CallableWithArguments.h index 28ef8433e..ee11bb5aa 100644 --- a/libnd4j/include/execution/CallableWithArguments.h +++ b/libnd4j/include/execution/CallableWithArguments.h @@ -27,7 +27,7 @@ #include #include -namespace samediff { +namespace sd { class CallableWithArguments { FUNC_DO _function_do; FUNC_1D _function_1d; diff --git a/libnd4j/include/execution/Engine.h b/libnd4j/include/execution/Engine.h index cd30867a9..0f317be78 100644 --- a/libnd4j/include/execution/Engine.h +++ b/libnd4j/include/execution/Engine.h @@ -21,7 +21,7 @@ #ifndef SD_ENGINE_H #define SD_ENGINE_H -namespace samediff { +namespace sd { enum Engine { ENGINE_CPU = 0, ENGINE_CUDA = 1, diff --git a/libnd4j/include/execution/ExecutionMode.h b/libnd4j/include/execution/ExecutionMode.h index ea97e3fc9..969568df2 100644 --- a/libnd4j/include/execution/ExecutionMode.h +++ b/libnd4j/include/execution/ExecutionMode.h @@ -21,7 +21,7 @@ #ifndef SD_EXECUTIONMODE_H #define SD_EXECUTIONMODE_H -namespace samediff { +namespace sd { enum ExecutionMode { MODE_UNDEFINED = 0, MODE_TRAINING = 1, diff --git a/libnd4j/include/execution/ThreadPool.h b/libnd4j/include/execution/ThreadPool.h index 6811f1b1c..84dc48728 100644 --- a/libnd4j/include/execution/ThreadPool.h +++ b/libnd4j/include/execution/ThreadPool.h @@ -32,7 +32,7 @@ #include #include -namespace samediff { +namespace sd { class ND4J_EXPORT ThreadPool { private: static ThreadPool* _INSTANCE; diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h index 2ea8295a8..4e3bf94e1 100644 --- a/libnd4j/include/execution/Threads.h +++ b/libnd4j/include/execution/Threads.h @@ -14,9 +14,9 @@ * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ - // - // @author raver119@gmail.com - // +// +// @author raver119@gmail.com +// #ifndef SAMEDIFF_THREADS_H #define SAMEDIFF_THREADS_H @@ -26,7 +26,7 @@ #include #include -namespace samediff { +namespace sd { class ND4J_EXPORT ThreadsHelper { public: static int numberOfThreads(int maxThreads, uint64_t numberOfElements); @@ -95,6 +95,14 @@ namespace samediff { }; class ND4J_EXPORT Threads { +#ifdef _OPENMP + public: + static std::mutex gThreadmutex; + static uint64_t _nFreeThreads; + static bool tryAcquire(int numThreads); + static bool freeThreads(int numThreads); +#endif + public: /** * This function executes 1 dimensional loop for a given number of threads diff --git a/libnd4j/include/execution/Ticket.h b/libnd4j/include/execution/Ticket.h index 80bf54145..b460b8be5 100644 --- a/libnd4j/include/execution/Ticket.h +++ b/libnd4j/include/execution/Ticket.h @@ -28,7 +28,7 @@ #include #include -namespace samediff { +namespace sd { class ND4J_EXPORT Ticket { private: bool _acquired = false; diff --git a/libnd4j/include/execution/impl/BlockingQueue.cpp b/libnd4j/include/execution/impl/BlockingQueue.cpp index 21c3b4c6a..6aa0f79cc 100644 --- a/libnd4j/include/execution/impl/BlockingQueue.cpp +++ b/libnd4j/include/execution/impl/BlockingQueue.cpp @@ -22,7 +22,7 @@ #include #include -namespace samediff { +namespace sd { template BlockingQueue::BlockingQueue(int queueSize) { _size = 0; diff --git a/libnd4j/include/execution/impl/CallableInterface.cpp b/libnd4j/include/execution/impl/CallableInterface.cpp index a719af848..73bdf3f3e 100644 --- a/libnd4j/include/execution/impl/CallableInterface.cpp +++ b/libnd4j/include/execution/impl/CallableInterface.cpp @@ -21,7 +21,7 @@ #include #include -namespace samediff { +namespace sd { CallableInterface::CallableInterface() { // initial state is available _available = true; diff --git a/libnd4j/include/execution/impl/CallableWithArguments.cpp b/libnd4j/include/execution/impl/CallableWithArguments.cpp index 8f17622b7..30497cc3a 100644 --- a/libnd4j/include/execution/impl/CallableWithArguments.cpp +++ b/libnd4j/include/execution/impl/CallableWithArguments.cpp @@ -20,7 +20,7 @@ #include -namespace samediff { +namespace sd { CallableWithArguments::CallableWithArguments(FUNC_DO func, uint64_t thread_id, uint64_t numThreads) { _function_do = func; _finished = false; diff --git a/libnd4j/include/execution/impl/ThreadPool.cpp b/libnd4j/include/execution/impl/ThreadPool.cpp index b02c4c4d5..5a1d8e85f 100644 --- a/libnd4j/include/execution/impl/ThreadPool.cpp +++ b/libnd4j/include/execution/impl/ThreadPool.cpp @@ -26,7 +26,7 @@ //#include #endif -namespace samediff { +namespace sd { // this function executed once per thread, it polls functions from queue, and executes them via wrapper static void executionLoop_(int thread_id, BlockingQueue *queue) { @@ -183,7 +183,7 @@ namespace samediff { } } - void ThreadPool::release(samediff::Ticket *ticket) { + void ThreadPool::release(sd::Ticket *ticket) { // returning ticket back to the queue std::unique_lock lock(_lock); _tickets.push(ticket); diff --git a/libnd4j/include/execution/impl/Threads.cpp b/libnd4j/include/execution/impl/Threads.cpp index 2d0ae1144..bf18e456a 100644 --- a/libnd4j/include/execution/impl/Threads.cpp +++ b/libnd4j/include/execution/impl/Threads.cpp @@ -25,8 +25,14 @@ #include #include +#ifdef _OPENMP -namespace samediff { +#include + +#endif + + +namespace sd { int ThreadsHelper::numberOfThreads(int maxThreads, uint64_t numberOfElements) { // let's see how many threads we actually need first @@ -51,34 +57,34 @@ namespace samediff { Span3 Span3::build(int loop, uint64_t threadID, uint64_t numThreads, int64_t startX, int64_t stopX, int64_t incX, int64_t startY, int64_t stopY, int64_t incY, int64_t startZ, int64_t stopZ, int64_t incZ) { switch (loop) { case 1: { - auto span = (stopX - startX) / numThreads; - auto s = span * threadID; - auto e = s + span; - if (threadID == numThreads - 1) - e = stopX; + auto span = (stopX - startX) / numThreads; + auto s = span * threadID; + auto e = s + span; + if (threadID == numThreads - 1) + e = stopX; - return Span3(s, e, incX, startY, stopY, incY, startZ, stopZ, incZ); - } + return Span3(s, e, incX, startY, stopY, incY, startZ, stopZ, incZ); + } break; case 2: { - auto span = (stopY - startY) / numThreads; - auto s = span * threadID; - auto e = s + span; - if (threadID == numThreads - 1) - e = stopY; + auto span = (stopY - startY) / numThreads; + auto s = span * threadID; + auto e = s + span; + if (threadID == numThreads - 1) + e = stopY; - return Span3(startX, stopX, incX, s, e, incY, startZ, stopZ, incZ); - } + return Span3(startX, stopX, incX, s, e, incY, startZ, stopZ, incZ); + } break; case 3: { - auto span = (stopZ - startZ) / numThreads; - auto s = span * threadID; - auto e = s + span; - if (threadID == numThreads - 1) - e = stopZ; + auto span = (stopZ - startZ) / numThreads; + auto s = span * threadID; + auto e = s + span; + if (threadID == numThreads - 1) + e = stopZ; - return Span3(startX, stopX, incX, startY, stopY, incY, s, e, incZ); - } + return Span3(startX, stopX, incX, startY, stopY, incY, s, e, incZ); + } break; default: throw std::runtime_error(""); @@ -116,24 +122,24 @@ namespace samediff { switch (loop) { case 1: { - auto span = (stopX - startX) / numThreads; - auto s = span * threadID; - auto e = s + span; - if (threadID == numThreads - 1) - e = stopX; + auto span = (stopX - startX) / numThreads; + auto s = span * threadID; + auto e = s + span; + if (threadID == numThreads - 1) + e = stopX; - return Span2(s, e, incX, startY, stopY, incY); - } + return Span2(s, e, incX, startY, stopY, incY); + } break; case 2: { - auto span = (stopY - startY) / numThreads; - auto s = span * threadID; - auto e = s + span; - if (threadID == numThreads - 1) - e = stopY; + auto span = (stopY - startY) / numThreads; + auto s = span * threadID; + auto e = s + span; + if (threadID == numThreads - 1) + e = stopY; - return Span2(startX, stopX, incX, s, e, incY); - } + return Span2(startX, stopX, incX, s, e, incY); + } break; default: throw std::runtime_error(""); @@ -270,7 +276,7 @@ namespace samediff { auto remY = iters_y % maxThreads; // in some cases there's nothing to think about, part 2 - if ((iters_x >= maxThreads && remX == 0 )|| (iters_y >= maxThreads && remY == 0)) + if ((iters_x >= maxThreads && remX == 0) || (iters_y >= maxThreads && remY == 0)) return maxThreads; // at this point we suppose that there's no loop perfectly matches number of our threads @@ -339,11 +345,35 @@ namespace samediff { return 1; } +#ifdef _OPENMP + + std::mutex Threads::gThreadmutex; + uint64_t Threads::_nFreeThreads = sd::Environment::getInstance()->maxThreads(); + + bool Threads::tryAcquire(int numThreads){ + std::lock_guard lock( gThreadmutex ); + auto nThreads = _nFreeThreads - numThreads; + if(nThreads >= 1){ + _nFreeThreads = nThreads; + + return true; + } + return false; + } + + bool Threads::freeThreads(int numThreads){ + std::lock_guard lock( gThreadmutex ); + _nFreeThreads += numThreads; + // check if correct number of threads + return _nFreeThreads > sd::Environment::getInstance()->maxThreads(); + } +#endif + int Threads::parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, uint32_t numThreads) { if (start > stop) throw std::runtime_error("Threads::parallel_for got start > stop"); - auto delta = (stop - start); + auto delta = (stop - start) / increment; if (numThreads > delta) numThreads = delta; @@ -357,35 +387,57 @@ namespace samediff { return 1; } - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); - if (ticket != nullptr) { - // if we got our threads - we'll run our jobs here - auto span = delta / numThreads; +#ifdef _OPENMP - for (uint32_t e = 0; e < numThreads; e++) { - auto start_ = span * e + start; - auto stop_ = start_ + span; - - // last thread will process tail - if (e == numThreads - 1) - stop_ = stop; - - // putting the task into the queue for a given thread - ticket->enqueue(e, numThreads, function, start_, stop_, increment); + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) + for (auto e = start; e < stop; e += increment) { + function(omp_get_thread_num(), e, e + 1, 1); } - - // block and wait till all threads finished the job - ticket->waitAndRelease(); - - // we tell that parallelism request succeeded + freeThreads(numThreads); return numThreads; - } else { + } + else { // if there were no threads available - we'll execute function right within current thread function(0, start, stop, increment); // we tell that parallelism request declined return 1; } +#else + + sd::Environment::getInstance()->maxThreads(); + auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); + if (ticket != nullptr) { + // if we got our threads - we'll run our jobs here + auto span = delta / numThreads; + + for (uint32_t e = 0; e < numThreads; e++) { + auto start_ = span * e + start; + auto stop_ = start_ + span; + + // last thread will process tail + if (e == numThreads - 1) + stop_ = stop; + + // putting the task into the queue for a given thread + ticket->enqueue(e, numThreads, function, start_, stop_, increment); + } + + // block and wait till all threads finished the job + ticket->waitAndRelease(); + + // we tell that parallelism request succeeded + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, start, stop, increment); + + // we tell that parallelism request declined + return 1; + } +#endif } int Threads::parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, uint32_t numThreads) { @@ -448,28 +500,53 @@ namespace samediff { // but we still mimic multithreaded execution return numThreads; - } else { - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); - if (ticket != nullptr) { + } + else { +#ifdef _OPENMP - for (int e = 0; e < numThreads; e++) { - auto threadId = numThreads - e - 1; - auto span = Span2::build(splitLoop, threadId, numThreads, startX, stopX, incX, startY, stopY, incY); - - ticket->enqueue(e, numThreads, function, span.startX(), span.stopX(), span.incX(), span.startY(), span.stopY(), span.incY()); + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) collapse(2) + for (auto x = startX; x < stopX; x += incX) { + for (auto y = startY; y < stopY; y += incY) { + function(omp_get_thread_num(), x, x+1, 1, y, y+1, 1); + } } - - // block until all threads finish their job - ticket->waitAndRelease(); - + freeThreads(numThreads); return numThreads; - } else { + } + else { // if there were no threads available - we'll execute function right within current thread function(0, startX, stopX, incX, startY, stopY, incY); // we tell that parallelism request declined return 1; } + +#else + + auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); + if (ticket != nullptr) { + + for (int e = 0; e < numThreads; e++) { + auto threadId = numThreads - e - 1; + auto span = Span2::build(splitLoop, threadId, numThreads, startX, stopX, incX, startY, stopY, incY); + + ticket->enqueue(e, numThreads, function, span.startX(), span.stopX(), span.incX(), span.startY(), span.stopY(), span.incY()); + } + + // block until all threads finish their job + ticket->waitAndRelease(); + + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, startX, stopX, incX, startY, stopY, incY); + + // we tell that parallelism request declined + return 1; + } +#endif }; } @@ -484,6 +561,35 @@ namespace samediff { if (startZ > stopZ) throw std::runtime_error("Threads::parallel_for got startZ > stopZ"); + if (numThreads == 1) { + // loop is too small - executing function as is + function(0, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); + return 1; + } + +#ifdef _OPENMP + + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) collapse(3) + for (auto x = startX; x < stopX; x += incX) { + for (auto y = startY; y < stopY; y += incY) { + for (auto z = startZ; z < stopZ; z += incZ) { + function(omp_get_thread_num(), x, x+1, 1, y, y+1, 1, z, z+1, 1); + } + } + } + + freeThreads(numThreads); + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); + + // we tell that parallelism request declined + return 1; + } +#else auto delta_x = stopX - startX; auto delta_y = stopY - startY; auto delta_z = stopZ - startZ; @@ -500,52 +606,79 @@ namespace samediff { } auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); - if (ticket != nullptr) { - auto splitLoop = ThreadsHelper::pickLoop3d(numThreads, itersX, itersY, itersZ); + if (ticket != nullptr) { + auto splitLoop = ThreadsHelper::pickLoop3d(numThreads, itersX, itersY, itersZ); - for (int e = 0; e < numThreads; e++) { - auto thread_id = numThreads - e - 1; - auto span = Span3::build(splitLoop, thread_id, numThreads, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); + for (int e = 0; e < numThreads; e++) { + auto thread_id = numThreads - e - 1; + auto span = Span3::build(splitLoop, thread_id, numThreads, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); - ticket->enqueue(e, numThreads, function, span.startX(), span.stopX(), span.incX(), span.startY(), span.stopY(), span.incY(), span.startZ(), span.stopZ(), span.incZ()); - } + ticket->enqueue(e, numThreads, function, span.startX(), span.stopX(), span.incX(), span.startY(), span.stopY(), span.incY(), span.startZ(), span.stopZ(), span.incZ()); + } - // block until we're done - ticket->waitAndRelease(); + // block until we're done + ticket->waitAndRelease(); - // we tell that parallelism request succeeded - return numThreads; - } else { - // if there were no threads available - we'll execute function right within current thread - function(0, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); - - // we tell that parallelism request declined - return 1; - } + // we tell that parallelism request succeeded + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, startX, stopX, incX, startY, stopY, incY, startZ, stopZ, incZ); + // we tell that parallelism request declined + return 1; + } +#endif } int Threads::parallel_do(FUNC_DO function, uint64_t numThreads) { - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); - if (ticket != nullptr) { - // submit tasks one by one - for (uint64_t e = 0; e < numThreads - 1; e++) - ticket->enqueue(e, numThreads, function); + if (numThreads == 1) { + function(0, numThreads); + return 1; + } - function(numThreads - 1, numThreads); +#ifdef _OPENMP - ticket->waitAndRelease(); + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) + for (int e = 0; e < numThreads; e++) { + function(e, numThreads); + } + freeThreads(numThreads); return numThreads; - } else { + } + else { // if there's no threads available - we'll execute function sequentially one by one for (uint64_t e = 0; e < numThreads; e++) function(e, numThreads); return numThreads; } +#else + auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + if (ticket != nullptr) { + // submit tasks one by one + for (uint64_t e = 0; e < numThreads - 1; e++) + ticket->enqueue(e, numThreads, function); + + function(numThreads - 1, numThreads); + + ticket->waitAndRelease(); + + return numThreads; + } + else { + // if there's no threads available - we'll execute function sequentially one by one + for (uint64_t e = 0; e < numThreads; e++) + function(e, numThreads); + + return numThreads; + } +#endif return numThreads; } @@ -565,26 +698,44 @@ namespace samediff { if (numThreads == 1) return function(0, start, stop, increment); - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); - if (ticket == nullptr) - return function(0, start, stop, increment); - // create temporary array int64_t intermediatery[256]; auto span = delta / numThreads; - // execute threads in parallel - for (uint32_t e = 0; e < numThreads; e++) { - auto start_ = span * e + start; - auto stop_ = span * (e + 1) + start; +#ifdef _OPENMP + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) + for (int e = 0; e < numThreads; e++) { + auto start_ = span * e + start; + auto stop_ = span * (e + 1) + start; - if (e == numThreads - 1) - intermediatery[e] = function(e, start_, stop, increment); - else - ticket->enqueue(e, numThreads, &intermediatery[e], function, start_, stop_, increment); + intermediatery[e] = function(e, start_, e == numThreads - 1 ? stop : stop_, increment); + } + freeThreads(numThreads); } + else{ + // if there were no thre ads available - we'll execute function right within current thread + return function(0, start, stop, increment); + } +#else + auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + if (ticket == nullptr) + return function(0, start, stop, increment); - ticket->waitAndRelease(); + // execute threads in parallel + for (uint32_t e = 0; e < numThreads; e++) { + auto start_ = span * e + start; + auto stop_ = span * (e + 1) + start; + + if (e == numThreads - 1) + intermediatery[e] = function(e, start_, stop, increment); + else + ticket->enqueue(e, numThreads, &intermediatery[e], function, start_, stop_, increment); + } + + ticket->waitAndRelease(); + +#endif // aggregate results in single thread for (uint64_t e = 1; e < numThreads; e++) @@ -609,26 +760,47 @@ namespace samediff { if (numThreads == 1) return function(0, start, stop, increment); - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); - if (ticket == nullptr) - return function(0, start, stop, increment); - // create temporary array double intermediatery[256]; auto span = delta / numThreads; - // execute threads in parallel - for (uint32_t e = 0; e < numThreads; e++) { - auto start_ = span * e + start; - auto stop_ = span * (e + 1) + start; +#ifdef _OPENMP - if (e == numThreads - 1) - intermediatery[e] = function(e, start_, stop, increment); - else - ticket->enqueue(e, numThreads, &intermediatery[e], function, start_, stop_, increment); + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) + for (int e = 0; e < numThreads; e++) { + auto start_ = span * e + start; + auto stop_ = span * (e + 1) + start; + + intermediatery[e] = function(e, start_, e == numThreads - 1 ? stop : stop_, increment); + } + freeThreads(numThreads); + } + else{ + // if there were no thre ads available - we'll execute function right within current thread + return function(0, start, stop, increment); } - ticket->waitAndRelease(); +#else + + auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + if (ticket == nullptr) + return function(0, start, stop, increment); + + // execute threads in parallel + for (uint32_t e = 0; e < numThreads; e++) { + auto start_ = span * e + start; + auto stop_ = span * (e + 1) + start; + + if (e == numThreads - 1) + intermediatery[e] = function(e, start_, stop, increment); + else + ticket->enqueue(e, numThreads, &intermediatery[e], function, start_, stop_, increment); + } + + ticket->waitAndRelease(); + +#endif // aggregate results in single thread for (uint64_t e = 1; e < numThreads; e++) @@ -639,7 +811,7 @@ namespace samediff { } - int Threads::parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size , uint32_t req_numThreads) { + int Threads::parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size, uint32_t req_numThreads) { if (start > stop) throw std::runtime_error("Threads::parallel_for got start > stop"); auto num_elements = (stop - start); @@ -647,6 +819,7 @@ namespace samediff { //so we will parition considering delta but not total elements auto delta = (stop - start) / increment; + // in some cases we just fire func as is if (delta == 0 || req_numThreads == 1) { function(0, start, stop, increment); @@ -654,7 +827,24 @@ namespace samediff { } int numThreads = 0; - int adjusted_numThreads = samediff::ThreadsHelper::numberOfThreads(req_numThreads, (num_elements * sizeof(double)) / (200 * type_size)); + struct th_span { + Nd4jLong start; + Nd4jLong end; + }; +#ifdef _OPENMP + constexpr int max_thread_count = 8; +#else + constexpr int max_thread_count = 1024; +#endif + th_span thread_spans[max_thread_count]; + + req_numThreads = req_numThreads > max_thread_count ? max_thread_count : req_numThreads; + +#ifdef _OPENMP + int adjusted_numThreads = max_thread_count; +#else + int adjusted_numThreads = sd::ThreadsHelper::numberOfThreads(req_numThreads, (num_elements * sizeof(double)) / (200 * type_size)); +#endif if (adjusted_numThreads > delta) adjusted_numThreads = delta; @@ -663,61 +853,89 @@ namespace samediff { function(0, start, stop, increment); return 1; } - //take span as ceil + + + + //take span as ceil auto spand = std::ceil((double)delta / (double)adjusted_numThreads); numThreads = static_cast(std::ceil((double)delta / spand)); - auto span = static_cast(spand); + auto span = static_cast(spand); - auto ticket = samediff::ThreadPool::getInstance()->tryAcquire(numThreads); - if (ticket != nullptr) { - //tail_add is additional value of the last part - //it could be negative or positive - //we will spread that value across - auto tail_add = delta - numThreads * span; - Nd4jLong begin = 0; - Nd4jLong end = 0; - //we will try enqueu bigger parts first - decltype(span) span1, span2; - int last = 0; - if (tail_add >= 0) { - //for span == 1 , tail_add is 0 - last = tail_add; - span1 = span + 1; - span2 = span; + //tail_add is additional value of the last part + //it could be negative or positive + //we will spread that value across + auto tail_add = delta - numThreads * span; + Nd4jLong begin = 0; + Nd4jLong end = 0; + + //we will try enqueu bigger parts first + decltype(span) span1, span2; + int last = 0; + if (tail_add >= 0) { + //for span == 1 , tail_add is 0 + last = tail_add; + span1 = span + 1; + span2 = span; + } + else { + last = numThreads + tail_add;// -std::abs(tail_add); + span1 = span; + span2 = span - 1; + } + for (int i = 0; i < last; i++) { + end = begin + span1 * increment; + // putting the task into the queue for a given thread + thread_spans[i].start = begin; + thread_spans[i].end = end; + begin = end; + } + for (int i = last; i < numThreads - 1; i++) { + end = begin + span2 * increment; + // putting the task into the queue for a given thread + thread_spans[i].start = begin; + thread_spans[i].end = end; + begin = end; + } + //for last one enqueue last offset as stop + //we need it in case our ((stop-start) % increment ) > 0 + thread_spans[numThreads - 1].start = begin; + thread_spans[numThreads - 1].end = stop; + +#ifdef _OPENMP + if (tryAcquire(numThreads)) { +#pragma omp parallel for num_threads(numThreads) + for (size_t j = 0; j < numThreads; j++) { + function(j, thread_spans[j].start, thread_spans[j].end, increment); } - else { - last = numThreads + tail_add;// -std::abs(tail_add); - span1 = span; - span2 = span - 1; - } - for (int i = 0; i < last; i++) { - end = begin + span1 * increment; - // putting the task into the queue for a given thread - ticket->enqueue(i, numThreads, function, begin, end, increment); - begin = end; - } - for (int i = last; i < numThreads - 1; i++) { - end = begin + span2 * increment; - // putting the task into the queue for a given thread - ticket->enqueue(i, numThreads, function, begin, end, increment); - begin = end; - } - //for last one enqueue last offset as stop - //we need it in case our ((stop-start) % increment ) > 0 - ticket->enqueue(numThreads - 1, numThreads, function, begin, stop, increment); - // block and wait till all threads finished the job - ticket->waitAndRelease(); - // we tell that parallelism request succeeded + freeThreads(numThreads); return numThreads; } else { - // if there were no threads available - we'll execute function right within current thread function(0, start, stop, increment); // we tell that parallelism request declined return 1; } +#else + auto ticket = sd::ThreadPool::getInstance()->tryAcquire(numThreads); + if (ticket != nullptr) { + + for (size_t j = 0; j < numThreads; j++) { + ticket->enqueue(j, numThreads, function, thread_spans[j].start, thread_spans[j].end, increment); + } + // block and wait till all threads finished the job + ticket->waitAndRelease(); + // we tell that parallelism request succeeded + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, start, stop, increment); + // we tell that parallelism request declined + return 1; + } +#endif } +} -} \ No newline at end of file diff --git a/libnd4j/include/execution/impl/Ticket.cpp b/libnd4j/include/execution/impl/Ticket.cpp index 98cb05376..d58e637cc 100644 --- a/libnd4j/include/execution/impl/Ticket.cpp +++ b/libnd4j/include/execution/impl/Ticket.cpp @@ -23,7 +23,7 @@ #include #include -namespace samediff { +namespace sd { Ticket::Ticket(const std::vector*> &queues) { _acquired = true; _queues = queues; @@ -38,7 +38,7 @@ namespace samediff { return _acquired; } - void Ticket::enqueue(int thread_id, samediff::CallableWithArguments *callable) { + void Ticket::enqueue(int thread_id, sd::CallableWithArguments *callable) { _queues[thread_id]->put(callable); _callables.emplace_back(callable); } @@ -88,7 +88,7 @@ namespace samediff { } - void Ticket::attach(uint32_t thread_id, samediff::CallableInterface *interface) { + void Ticket::attach(uint32_t thread_id, sd::CallableInterface *interface) { _interfaces[thread_id] = interface; } } \ No newline at end of file diff --git a/libnd4j/include/graph/Context.h b/libnd4j/include/graph/Context.h index 96d7e8b12..2351cc320 100644 --- a/libnd4j/include/graph/Context.h +++ b/libnd4j/include/graph/Context.h @@ -112,7 +112,7 @@ namespace sd { sd::random::RandomBuffer* getRNG(); void setRNG(sd::random::RandomBuffer* rng); - void setTargetEngine(samediff::Engine engine); + void setTargetEngine(sd::Engine engine); VariableSpace *getVariableSpace(); @@ -228,8 +228,8 @@ namespace sd { void setShapeFunctionOverride(bool reallyOverride); bool shapeFunctionOverride(); - samediff::ExecutionMode executionMode(); - void setExecutionMode(samediff::ExecutionMode executionMode); + sd::ExecutionMode executionMode(); + void setExecutionMode(sd::ExecutionMode executionMode); bool isTraining(); bool isInference(); diff --git a/libnd4j/include/graph/ContextPrototype.h b/libnd4j/include/graph/ContextPrototype.h index 57d773dbb..8e0ad7609 100644 --- a/libnd4j/include/graph/ContextPrototype.h +++ b/libnd4j/include/graph/ContextPrototype.h @@ -64,9 +64,9 @@ namespace sd { bool _useMKLDNN = sd::Environment::getInstance()->isUseMKLDNN(); // target engine for execution - samediff::Engine _engine = DEFAULT_ENGINE; + sd::Engine _engine = DEFAULT_ENGINE; - samediff::ExecutionMode _execMode = samediff::ExecutionMode::MODE_UNDEFINED; + sd::ExecutionMode _execMode = sd::ExecutionMode::MODE_UNDEFINED; public: explicit ContextPrototype(sd::ops::OpDescriptor* opDescriptor = nullptr, int nodeId = 1, bool inPlace = false); ~ContextPrototype() = default; @@ -99,7 +99,7 @@ namespace sd { std::vector* getDArguments(); std::vector* getAxis(); - samediff::Engine engine(); + sd::Engine engine(); size_t numT(); size_t numI(); diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp index 954329f42..550e20f74 100644 --- a/libnd4j/include/graph/impl/Context.cpp +++ b/libnd4j/include/graph/impl/Context.cpp @@ -107,7 +107,7 @@ namespace sd { delete _context; } - void Context::setTargetEngine(samediff::Engine engine) { + void Context::setTargetEngine(sd::Engine engine) { _engine = engine; } @@ -548,20 +548,20 @@ namespace sd { return _shapeFunctionOverride; } - samediff::ExecutionMode Context::executionMode() { + sd::ExecutionMode Context::executionMode() { return _execMode; } - void Context::setExecutionMode(samediff::ExecutionMode executionMode) { + void Context::setExecutionMode(sd::ExecutionMode executionMode) { _execMode = executionMode; } bool Context::isTraining() { - return _execMode == samediff::ExecutionMode::MODE_TRAINING; + return _execMode == sd::ExecutionMode::MODE_TRAINING; } bool Context::isInference() { - return _execMode == samediff::ExecutionMode::MODE_INFERENCE; + return _execMode == sd::ExecutionMode::MODE_INFERENCE; } void Context::setDArguments(sd::DataType *arguments, int numberOfArguments) { diff --git a/libnd4j/include/graph/impl/ContextPrototype.cpp b/libnd4j/include/graph/impl/ContextPrototype.cpp index 417c46b3a..e40a9d078 100644 --- a/libnd4j/include/graph/impl/ContextPrototype.cpp +++ b/libnd4j/include/graph/impl/ContextPrototype.cpp @@ -59,7 +59,7 @@ namespace sd { } } - samediff::Engine ContextPrototype::engine() { + sd::Engine ContextPrototype::engine() { return _engine; } diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h index 508b84f20..0fca710e6 100644 --- a/libnd4j/include/helpers/Loops.h +++ b/libnd4j/include/helpers/Loops.h @@ -511,7 +511,7 @@ namespace sd { //*********************************************// case LoopKind::EWS1: { - auto span = samediff::Span::build(threadId, numThreads, 0, len, 1); + auto span = sd::Span::build(threadId, numThreads, 0, len, 1); int64_t start = span.startX(), stop = span.stopX(); for (auto i = start; i < stop; i++) @@ -524,7 +524,7 @@ namespace sd { const uint xEws = shape::elementWiseStride(xShapeInfo); const uint zEws = shape::elementWiseStride(zShapeInfo); - auto span = samediff::Span::build(threadId, numThreads, 0, len, 1); + auto span = sd::Span::build(threadId, numThreads, 0, len, 1); int64_t start = span.startX(), stop = span.stopX(); for (auto i = start; i < stop; i++) @@ -538,7 +538,7 @@ namespace sd { uint castXShapeInfo[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, castXShapeInfo); - auto span = samediff::Span::build(threadId, numThreads, 0, len, 1); + auto span = sd::Span::build(threadId, numThreads, 0, len, 1); int64_t start = span.startX(), stop = span.stopX(); if (zEws > 1) { @@ -558,7 +558,7 @@ namespace sd { //*********************************************// case LoopKind::RANK1: { - auto span = samediff::Span::build(threadId, numThreads, 0, len, 1); + auto span = sd::Span::build(threadId, numThreads, 0, len, 1); for (auto i0 = span.startX(); i0 < span.stopX(); i0++) z[i0 * zStride[0]] = OpType::op(x[i0 * xStride[0]], extraParams); @@ -570,8 +570,8 @@ namespace sd { auto uXShape0 = static_cast(xShape[0]); auto uXShape1 = static_cast(xShape[1]); - auto loop = samediff::ThreadsHelper::pickLoop2d(numThreads, uXShape0, uXShape1); - auto span = samediff::Span2::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1); + auto loop = sd::ThreadsHelper::pickLoop2d(numThreads, uXShape0, uXShape1); + auto span = sd::Span2::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1); for (auto i0 = span.startX(); i0 < span.stopX(); i0++) { auto z0 = i0 * zStride[0]; @@ -589,8 +589,8 @@ namespace sd { auto uXShape1 = xShape[1]; auto uXShape2 = xShape[2]; - auto loop = samediff::ThreadsHelper::pickLoop2d(numThreads, uXShape0, uXShape1); - auto span = samediff::Span2::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1); + auto loop = sd::ThreadsHelper::pickLoop2d(numThreads, uXShape0, uXShape1); + auto span = sd::Span2::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1); for (auto i0 = span.startX(); i0 < span.stopX(); i0++) @@ -611,8 +611,8 @@ namespace sd { auto uXShape2 = xShape[2]; auto uXShape3 = xShape[3]; - auto loop = samediff::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2); - auto span = samediff::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1); + auto loop = sd::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2); + auto span = sd::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1); for (auto i0 = span.startX(); i0 < span.stopX(); i0++) for (auto i1 = span.startY(); i1 < span.stopY(); i1++) @@ -634,8 +634,8 @@ namespace sd { auto uXShape3 = xShape[3]; auto uXShape4 = xShape[4]; - auto loop = samediff::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2); - auto span = samediff::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1); + auto loop = sd::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2); + auto span = sd::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1); for (auto i0 = span.startX(); i0 < span.stopX(); i0++) @@ -666,7 +666,7 @@ namespace sd { bool canCastX = DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); bool canCastZ = DataTypeUtils::castShapeInfo(zShapeInfo, zShapeInfoCast); - auto span = samediff::Span::build(threadId, numThreads, 0, len, 1); + auto span = sd::Span::build(threadId, numThreads, 0, len, 1); for (auto i = span.startX(); i < span.stopX(); i++) { auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX); diff --git a/libnd4j/include/helpers/cpu/MmulHelper.cpp b/libnd4j/include/helpers/cpu/MmulHelper.cpp index edbc45fd4..3f420dcc1 100644 --- a/libnd4j/include/helpers/cpu/MmulHelper.cpp +++ b/libnd4j/include/helpers/cpu/MmulHelper.cpp @@ -93,7 +93,7 @@ static void usualGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, } }; - samediff::Threads::parallel_tad(func, 0, cLen); + sd::Threads::parallel_tad(func, 0, cLen); } @@ -146,7 +146,7 @@ static void usualGemv(const NDArray* vA, const NDArray* vX, NDArray* vY, const } }; - samediff::Threads::parallel_tad(func, 0, M); + sd::Threads::parallel_tad(func, 0, M); } ////////////////////////////////////////////////////////////////////////////// @@ -477,7 +477,7 @@ static void batchedGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, } }; - samediff::Threads::parallel_tad(func, 0, cLen); + sd::Threads::parallel_tad(func, 0, cLen); } ////////////////////////////////////////////////////////////////////////// @@ -669,7 +669,7 @@ static void usualGemm(const char cOrder, const bool transA, const bool transB, c } }; - samediff::Threads::parallel_tad(func, 0, M, 1, 0, N, 1); + sd::Threads::parallel_tad(func, 0, M, 1, 0, N, 1); } ////////////////////////////////////////////////////////////////////////////// @@ -703,7 +703,7 @@ static void usualGemv(const char aOrder, const int M, const int N, const double } }; - samediff::Threads::parallel_tad(func, 0, M); + sd::Threads::parallel_tad(func, 0, M); } */ diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp index fe6019b5a..d82335725 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp @@ -62,7 +62,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -83,7 +83,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -104,7 +104,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -131,7 +131,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -160,7 +160,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -191,7 +191,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -224,7 +224,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -248,7 +248,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -272,7 +272,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } break; @@ -299,7 +299,7 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } } } diff --git a/libnd4j/include/helpers/impl/DebugHelper.cpp b/libnd4j/include/helpers/impl/DebugHelper.cpp index d24068a65..4299674e6 100644 --- a/libnd4j/include/helpers/impl/DebugHelper.cpp +++ b/libnd4j/include/helpers/impl/DebugHelper.cpp @@ -99,7 +99,7 @@ PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) reduction(+:_nanCount,_infCount,_m return _stdDevValue; }; - _stdDevValue = samediff::Threads::parallel_double(func, LAMBDA_AD { return _old + _new; }, 0, input->lengthOf()); + _stdDevValue = sd::Threads::parallel_double(func, LAMBDA_AD { return _old + _new; }, 0, input->lengthOf()); info->_stdDevValue = math::nd4j_sqrt(_stdDevValue / input->lengthOf()); diff --git a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp index 574978a7d..b22b6c65a 100644 --- a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp @@ -199,7 +199,7 @@ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, } } - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); #endif } @@ -237,7 +237,7 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, auto yLen = shape::length(hYShapeInfo); auto numTads = yLen / xLen; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); #endif } @@ -273,7 +273,7 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, auto yLen = shape::length(hYShapeInfo); auto numTads = xLen / yLen; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, @@ -308,7 +308,7 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, auto yLen = shape::length(hYShapeInfo); auto numTads = yLen / xLen; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } @@ -348,7 +348,7 @@ void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, auto yLen = shape::length(hYShapeInfo); auto numTads = xLen / yLen; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, @@ -384,7 +384,7 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, auto yLen = shape::length(hYShapeInfo); auto numTads = yLen / xLen; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } //////////////////////////////////////////////////////////////////////// @@ -427,7 +427,7 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); #endif } @@ -462,7 +462,7 @@ void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } @@ -495,7 +495,7 @@ void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } @@ -534,7 +534,7 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + sd::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -562,7 +562,7 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + sd::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -590,7 +590,7 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + sd::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -618,7 +618,7 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + sd::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -791,7 +791,7 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce3::Reduce3, ::exec(opNum, hX, hXShapeInfo, extraParamsVals, hY, hYShapeInfo, hZ, hZShapeInfo, dimension, dimensionLength, start, stop), LIBND4J_TYPES, FLOAT_TYPES); }; - samediff::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); + sd::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); } @@ -820,7 +820,7 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce3::Reduce3, ::execAll(opNum, hX, hXShapeInfo, extraParamsVals, hY, hYShapeInfo, hZ, hZShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets, start, stop), LIBND4J_TYPES, FLOAT_TYPES); }; - samediff::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); + sd::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); } //////////////////////////////////////////////////////////////////////// @@ -861,7 +861,7 @@ void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce3::Reduce3, ::exec(opNum, hX, hXShapeInfo, extraParamsVals, hY, hYShapeInfo, hZ, hZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, start, stop), LIBND4J_TYPES, FLOAT_TYPES); }; - samediff::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); + sd::Threads::parallel_tad(func, 0, tadPack.numberOfTads()); } @@ -905,7 +905,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); #endif } @@ -942,7 +942,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + sd::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); #endif } @@ -976,7 +976,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } @@ -1012,7 +1012,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + sd::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); } //////////////////////////////////////////////////////////////////////// @@ -1044,7 +1044,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } @@ -1080,7 +1080,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + sd::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); } //////////////////////////////////////////////////////////////////////// @@ -1193,7 +1193,7 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformFloat, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, FLOAT_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1215,7 +1215,7 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformBool, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, BOOL_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1243,7 +1243,7 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformAny, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, LIBND4J_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } } @@ -1266,7 +1266,7 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformSame, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1288,7 +1288,7 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformStrict, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), FLOAT_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + sd::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index cf04acbe7..976484782 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -1318,7 +1318,7 @@ void pullRowsGeneric(void *vx, } }; - samediff::Threads::parallel_tad(func, 0, n, 1, _threads); + sd::Threads::parallel_tad(func, 0, n, 1, _threads); } void pullRows(Nd4jPointer *extraPointers, @@ -1377,7 +1377,7 @@ void tearGeneric(void *vx, } }; - samediff::Threads::parallel_tad(func,0, numTads); + sd::Threads::parallel_tad(func,0, numTads); } void tear(Nd4jPointer *extraPointers, @@ -1530,7 +1530,7 @@ void shuffleGeneric(void **hX, Nd4jLong **hXShapeInfo, void **dz, Nd4jLong **hZS } }; - samediff::Threads::parallel_tad(func, 0, N); + sd::Threads::parallel_tad(func, 0, N); } void shuffle(Nd4jPointer *extras, @@ -1944,7 +1944,7 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer return cnt; }; - return samediff::Threads::parallel_long(func, LAMBDA_AL { return _old + _new; }, 0, N); + return sd::Threads::parallel_long(func, LAMBDA_AL { return _old + _new; }, 0, N); } @@ -2653,7 +2653,7 @@ static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSub } }; - samediff::Threads::parallel_do(func); + sd::Threads::parallel_do(func); } //////////////////////////////////////////////////////////////////////// @@ -2812,7 +2812,7 @@ void ctxSetExecutionMode(OpaqueContext* ptr, int execMode) { if (execMode < 0 || execMode > 2) execMode = 0; - ptr->setExecutionMode((samediff::ExecutionMode) execMode); + ptr->setExecutionMode((sd::ExecutionMode) execMode); } void ctxPurge(OpaqueContext* ptr) { diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 1a4de3de5..78bd0149a 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -3799,7 +3799,7 @@ void ctxSetExecutionMode(OpaqueContext* ptr, int execMode) { if (execMode < 0 || execMode > 2) execMode = 0; - ptr->setExecutionMode((samediff::ExecutionMode) execMode); + ptr->setExecutionMode((sd::ExecutionMode) execMode); } OpaqueDataBuffer* allocateDataBuffer(Nd4jLong elements, int dataType, bool allocateBoth) { diff --git a/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp b/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp index 7edb9d90d..826c61330 100644 --- a/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp +++ b/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp @@ -60,7 +60,7 @@ namespace sd { } } }; - samediff::Threads::parallel_tad(func, 0, xArr.lengthOf()); + sd::Threads::parallel_tad(func, 0, xArr.lengthOf()); return; } @@ -95,7 +95,7 @@ namespace sd { } } }; - samediff::Threads::parallel_tad(func, 0, nLen, 1); + sd::Threads::parallel_tad(func, 0, nLen, 1); return; } @@ -137,7 +137,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, zLen); + sd::Threads::parallel_for(func, 0, zLen); } template @@ -200,7 +200,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, zLen); + sd::Threads::parallel_for(func, 0, zLen); } template @@ -263,7 +263,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, zLen); + sd::Threads::parallel_for(func, 0, zLen); } template diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp index d4abd8c82..f81043cdd 100644 --- a/libnd4j/include/loops/cpu/indexreduce.hpp +++ b/libnd4j/include/loops/cpu/indexreduce.hpp @@ -79,7 +79,7 @@ Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vex } }; - maxThreads = samediff::Threads::parallel_for(func, 0, len, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, len, 1, maxThreads); for (int e = 0; e < maxThreads; e++) startingIndex = OpType::update(startingIndex, intermediatery[e], extraParams); @@ -95,7 +95,7 @@ Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vex } }; - maxThreads = samediff::Threads::parallel_for(func, 0, len, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, len, 1, maxThreads); for (int e = 0; e < maxThreads; e++) startingIndex = OpType::update(startingIndex, intermediatery[e], extraParams); diff --git a/libnd4j/include/loops/cpu/random.hpp b/libnd4j/include/loops/cpu/random.hpp index 034179f07..caf7b247b 100644 --- a/libnd4j/include/loops/cpu/random.hpp +++ b/libnd4j/include/loops/cpu/random.hpp @@ -67,7 +67,7 @@ namespace functions { z[i] = OpClass::op(x[i], y[i], i, length, rng, extraArguments); } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else{ uint xShapeInfoCast[MAX_RANK]; @@ -81,7 +81,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } } else if (shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) { @@ -100,7 +100,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else if (shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo)) { @@ -118,7 +118,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else if (shape::haveSameShapeAndStrides(yShapeInfo, zShapeInfo)) { @@ -136,7 +136,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else { @@ -157,7 +157,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } }; @@ -192,7 +192,7 @@ namespace functions { z[i] = OpClass::op(x[i], i, length, rng, extraArguments); } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else{ auto func = PRAGMA_THREADS_FOR { @@ -203,7 +203,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } } else { @@ -220,7 +220,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } } @@ -245,7 +245,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } else{ sd::OmpLaunchHelper info(length); @@ -261,7 +261,7 @@ namespace functions { } }; - samediff::Threads::parallel_for(func, 0, length, 1); + sd::Threads::parallel_for(func, 0, length, 1); } } diff --git a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp index afb441a45..95dae9465 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp @@ -208,13 +208,26 @@ namespace functions { Z _CUDA_H ReduceBoolFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD for (auto e = 0; e < maxThreads; e++) intermediate[e] = OpType::startingValue(x); +#ifdef _OPENMP + + if (xEws == 1) { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i], extraParams), extraParams); + } else { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i * xEws], extraParams), extraParams); + } + +#else auto func = PRAGMA_THREADS_FOR { if (xEws == 1) { for (auto i = start; i < stop; i++) @@ -225,7 +238,9 @@ namespace functions { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); + +#endif // merge results for (int e = 1; e < maxThreads; e++) diff --git a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp index 40c24f4fa..b44cdca03 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp @@ -72,7 +72,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD @@ -84,7 +84,7 @@ namespace functions { intermediate[thread_id] = OpType::update(intermediate[thread_id], OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams); }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); // merge results for (int e = 1; e < maxThreads; e++) @@ -242,13 +242,27 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD for (auto e = 0; e < maxThreads; e++) intermediate[e] = OpType::startingValue(x); +#ifdef _OPENMP + + if (xEws == 1) { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i], extraParams), extraParams); + } else { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i * xEws], extraParams), extraParams); + } + +#else + auto func = PRAGMA_THREADS_FOR { if (xEws == 1) { for (auto i = start; i < stop; i++) @@ -259,7 +273,9 @@ namespace functions { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); + +#endif // merge results for (int e = 1; e < maxThreads; e++) diff --git a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp index 98b462ebd..387646d70 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp @@ -67,7 +67,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD @@ -79,7 +79,7 @@ namespace functions { intermediate[thread_id] = OpType::update(intermediate[thread_id], OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams); }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); // merge results for (int e = 1; e < maxThreads; e++) @@ -231,13 +231,26 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD for (auto e = 0; e < maxThreads; e++) intermediate[e] = OpType::startingValue(x); +#ifdef _OPENMP + + if (xEws == 1) { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i], extraParams), extraParams); + } else { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i * xEws], extraParams), extraParams); + } + +#else auto func = PRAGMA_THREADS_FOR { if (xEws == 1) { for (auto i = start; i < stop; i++) @@ -248,7 +261,9 @@ namespace functions { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); + +#endif // merge results for (int e = 1; e < maxThreads; e++) diff --git a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp index f357b7e64..7b88cd4cb 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp @@ -69,7 +69,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); X intermediate[64]; PRAGMA_OMP_SIMD @@ -81,7 +81,7 @@ namespace functions { intermediate[thread_id] = OpType::update(intermediate[thread_id], OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams); }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); // merge results for (int e = 1; e < maxThreads; e++) @@ -240,13 +240,26 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxMasterThreads()); X intermediate[64]; PRAGMA_OMP_SIMD for (auto e = 0; e < maxThreads; e++) intermediate[e] = OpType::startingValue(x); +#ifdef _OPENMP + + if (xEws == 1) { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i], extraParams), extraParams); + } else { + PRAGMA_OMP_PARALLEL_FOR_THREADS(maxThreads) + for (Nd4jLong i = 0; i < length; i++) + intermediate[omp_get_thread_num()] = OpType::update(intermediate[omp_get_thread_num()], OpType::op(x[i * xEws], extraParams), extraParams); + } + +#else auto func = PRAGMA_THREADS_FOR { if (xEws == 1) { for (auto i = start; i < stop; i++) @@ -257,7 +270,9 @@ namespace functions { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); + +#endif // merge results for (int e = 1; e < maxThreads; e++) diff --git a/libnd4j/include/loops/cpu/reduce3.hpp b/libnd4j/include/loops/cpu/reduce3.hpp index 961c6b1c8..8540342c0 100644 --- a/libnd4j/include/loops/cpu/reduce3.hpp +++ b/libnd4j/include/loops/cpu/reduce3.hpp @@ -93,7 +93,7 @@ void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); } else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) { @@ -104,7 +104,7 @@ void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); } else { uint yShapeInfoCast[MAX_RANK]; const bool canCastY = sd::DataTypeUtils::castShapeInfo(yShapeInfo, yShapeInfoCast); @@ -117,7 +117,7 @@ void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, } }; - maxThreads = samediff::Threads::parallel_for(func, 0, length, 1, maxThreads); + maxThreads = sd::Threads::parallel_for(func, 0, length, 1, maxThreads); } // merge step diff --git a/libnd4j/include/loops/cpu/summarystatsreduce.cpp b/libnd4j/include/loops/cpu/summarystatsreduce.cpp index f6b44b75c..dd7756b04 100644 --- a/libnd4j/include/loops/cpu/summarystatsreduce.cpp +++ b/libnd4j/include/loops/cpu/summarystatsreduce.cpp @@ -187,7 +187,7 @@ namespace functions { } }; - samediff::Threads::parallel_tad(func, 0, resultLength, 1); + sd::Threads::parallel_tad(func, 0, resultLength, 1); } diff --git a/libnd4j/include/loops/impl/type_conversions.cpp b/libnd4j/include/loops/impl/type_conversions.cpp index 16914bd86..133cc1c02 100644 --- a/libnd4j/include/loops/impl/type_conversions.cpp +++ b/libnd4j/include/loops/impl/type_conversions.cpp @@ -86,7 +86,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, N); + sd::Threads::parallel_for(func, 0, N); } template @@ -184,7 +184,7 @@ PRAGMA_OMP_ATOMIC_ARGS(write) } }; - samediff::Threads::parallel_for(func, 4, flimit); + sd::Threads::parallel_for(func, 4, flimit); } /** @@ -206,7 +206,7 @@ PRAGMA_OMP_ATOMIC_ARGS(write) z[i] = static_cast(static_cast(x[i])); } }; - samediff::Threads::parallel_for(func, 0, N); + sd::Threads::parallel_for(func, 0, N); }; template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); diff --git a/libnd4j/include/ops/declarable/DeclarableOp.h b/libnd4j/include/ops/declarable/DeclarableOp.h index fd95f382d..0acd82a0e 100644 --- a/libnd4j/include/ops/declarable/DeclarableOp.h +++ b/libnd4j/include/ops/declarable/DeclarableOp.h @@ -112,7 +112,7 @@ namespace sd { */ int prepareOutputs(Context& block); - virtual samediff::EmptyHandling emptyHandling(); + virtual sd::EmptyHandling emptyHandling(); public: // for special cases, like BooleanOps DeclarableOp(); diff --git a/libnd4j/include/ops/declarable/EmptyHandling.h b/libnd4j/include/ops/declarable/EmptyHandling.h index c25fea498..cc570f508 100644 --- a/libnd4j/include/ops/declarable/EmptyHandling.h +++ b/libnd4j/include/ops/declarable/EmptyHandling.h @@ -21,7 +21,7 @@ #ifndef SAMEDIFF_EMPTYHANDLING_H #define SAMEDIFF_EMPTYHANDLING_H -namespace samediff { +namespace sd { enum EmptyHandling { EMPTY_SKIP = 1, EMPTY_EXCEPTION = 2, diff --git a/libnd4j/include/ops/declarable/OpRegistrator.h b/libnd4j/include/ops/declarable/OpRegistrator.h index 3a9fb3df6..ee91ab389 100644 --- a/libnd4j/include/ops/declarable/OpRegistrator.h +++ b/libnd4j/include/ops/declarable/OpRegistrator.h @@ -38,15 +38,15 @@ namespace std { template <> - class hash> { + class hash> { public: - size_t operator()(const std::pair& k) const; + size_t operator()(const std::pair& k) const; }; template <> - class hash> { + class hash> { public: - size_t operator()(const std::pair& k) const; + size_t operator()(const std::pair& k) const; }; }; @@ -87,8 +87,8 @@ namespace sd { std::vector _uniqueD; // pointers to platform-specific helpers - MAP_IMPL, sd::ops::platforms::PlatformHelper*> _helpersLH; - MAP_IMPL, sd::ops::platforms::PlatformHelper*> _helpersH; + MAP_IMPL, sd::ops::platforms::PlatformHelper*> _helpersLH; + MAP_IMPL, sd::ops::platforms::PlatformHelper*> _helpersH; std::vector _uniqueH; std::mutex _locker; @@ -119,13 +119,13 @@ namespace sd { void registerHelper(sd::ops::platforms::PlatformHelper* op); - bool hasHelper(Nd4jLong hash, samediff::Engine engine); + bool hasHelper(Nd4jLong hash, sd::Engine engine); sd::ops::DeclarableOp* getOperation(const char *name); sd::ops::DeclarableOp* getOperation(Nd4jLong hash); sd::ops::DeclarableOp* getOperation(std::string &name); - sd::ops::platforms::PlatformHelper* getPlatformHelper(Nd4jLong hash, samediff::Engine engine); + sd::ops::platforms::PlatformHelper* getPlatformHelper(Nd4jLong hash, sd::Engine engine); std::vector getAllHashes(); diff --git a/libnd4j/include/ops/declarable/PlatformHelper.h b/libnd4j/include/ops/declarable/PlatformHelper.h index b34a936ee..6737fc672 100644 --- a/libnd4j/include/ops/declarable/PlatformHelper.h +++ b/libnd4j/include/ops/declarable/PlatformHelper.h @@ -37,7 +37,7 @@ namespace sd { class ND4J_EXPORT PlatformHelper { protected: // target engine for this impl - samediff::Engine _engine; + sd::Engine _engine; // name of the operation this helper is built for std::string _name; @@ -45,13 +45,13 @@ namespace sd { // hash of the operation this helper is built for Nd4jLong _hash; public: - PlatformHelper(const char *name, samediff::Engine engine); + PlatformHelper(const char *name, sd::Engine engine); ~PlatformHelper() = default; std::string name(); - samediff::Engine engine(); + sd::Engine engine(); Nd4jLong hash(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp index 5ac61964c..77f1b9aa6 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp @@ -174,7 +174,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(func, 0, N); + sd::Threads::parallel_tad(func, 0, N); } void barnes_edge_forces(const NDArray* rowP, NDArray const* colP, NDArray const* valP, int N, NDArray* output, NDArray const& data) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp index de56650c8..eccd8a7fa 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp @@ -154,7 +154,7 @@ void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alp } }; - samediff::Threads::parallel_for(func, 0, inputLen); + sd::Threads::parallel_for(func, 0, inputLen); } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp index 68b8c6955..a426dc0bd 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp @@ -565,7 +565,7 @@ namespace sd { } }; // - samediff::Threads::parallel_aligned_increment(func, 0, total_num, inc); + sd::Threads::parallel_aligned_increment(func, 0, total_num, inc); } else { //NC...HW case here @@ -631,7 +631,7 @@ namespace sd { } }; // - samediff::Threads::parallel_aligned_increment(func, 0, total_num, inc); + sd::Threads::parallel_aligned_increment(func, 0, total_num, inc); } } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp index 078ebda10..327fbfc22 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp @@ -55,7 +55,7 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); + sd::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { @@ -87,7 +87,7 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp index c5c5cf9c6..b6f5b732e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp @@ -56,7 +56,7 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); + sd::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); @@ -84,7 +84,7 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp index daaf4f71a..efff55e00 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp @@ -114,7 +114,7 @@ void bgemm_(const std::vector& vA, const std::vector& vB, st } }; - samediff::Threads::parallel_tad(func, 0, vaSize); + sd::Threads::parallel_tad(func, 0, vaSize); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp index a0e6cf061..0ccaa3b39 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp @@ -106,7 +106,7 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* delete []zOffsets; }; - samediff::Threads::parallel_do(func, info._numThreads); + sd::Threads::parallel_do(func, info._numThreads); } ////////////////////////////////////////////////////////////////////////// @@ -178,7 +178,7 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf()); + sd::Threads::parallel_for(func, 0, input->lengthOf()); } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp index ec06610b8..b16d20a76 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp @@ -121,7 +121,7 @@ static void betaIncForArray(sd::LaunchContext * context, const NDArray& a, const output.t(i) = betaIncCore(a.t(i), b.t(i), x.t(i)); }; - samediff::Threads::parallel_for(func, 0, xLen); + sd::Threads::parallel_for(func, 0, xLen); } /////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp index db6d27ffd..185a15780 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp @@ -89,7 +89,7 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } else { @@ -127,7 +127,7 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output } }; - samediff::Threads::parallel_tad(func, 0, bS); + sd::Threads::parallel_tad(func, 0, bS); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp index 12961fe92..114707687 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp @@ -40,7 +40,7 @@ namespace sd { } return sum; }; - sumt = samediff::Threads::parallel_long(func, LAMBDA_SUML, 0, length - 1); + sumt = sd::Threads::parallel_long(func, LAMBDA_SUML, 0, length - 1); } else { //PRAGMA_OMP_PARALLEL_FOR_SIMD_REDUCTION(+:sum) auto func = PRAGMA_REDUCE_LONG { @@ -53,7 +53,7 @@ namespace sd { return sum; }; - sumt = samediff::Threads::parallel_long(func, LAMBDA_SUML, 0, length - 1); + sumt = sd::Threads::parallel_long(func, LAMBDA_SUML, 0, length - 1); } //nd4j_printf("Sum: %lld\n", sumt) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/confusion.cpp b/libnd4j/include/ops/declarable/helpers/cpu/confusion.cpp index 685d80d2d..b987f797a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/confusion.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/confusion.cpp @@ -40,7 +40,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, lLen); + sd::Threads::parallel_for(func, 0, lLen); } void confusionFunctor(sd::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/convolutions.cpp b/libnd4j/include/ops/declarable/helpers/cpu/convolutions.cpp index c1dd5dd56..b06fe7fe0 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/convolutions.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/convolutions.cpp @@ -101,7 +101,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, kD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, kD, 1); } else { @@ -139,7 +139,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, oD, 1); //func(0, 0, bS, 1, 0, oD, 1); } } @@ -215,7 +215,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, bS); + sd::Threads::parallel_tad(func, 0, bS); } else { @@ -251,7 +251,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, bS); + sd::Threads::parallel_tad(func, 0, bS); } } @@ -606,7 +606,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oH, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oH, 1); } ////////////////////////////////////////////////////////////////////////// @@ -663,7 +663,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } ////////////////////////////////////////////////////////////////////////// @@ -716,7 +716,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, iH, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, iH, 1); } ////////////////////////////////////////////////////////////////////////// @@ -777,7 +777,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, iD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, iD, 1); } ////////////////////////////////////////////////////////////////////////// @@ -860,7 +860,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } /*************************************************************************/ else if(poolingMode == 1) { // avg @@ -914,7 +914,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } /*************************************************************************/ else if(poolingMode == 2) { // pnorm @@ -963,7 +963,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } else { nd4j_printf("ConvolutionUtils::pooling2d: pooling mode argument can take three values only: 0, 1, 2, but got %i instead !\n", poolingMode); @@ -1068,7 +1068,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } /*************************************************************************/ else if(poolingMode == 1) { // avg @@ -1131,7 +1131,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } /*************************************************************************/ else if(poolingMode == 2) { // pnorm @@ -1191,7 +1191,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } else { nd4j_printf("ConvolutionUtils::pooling3d: pooling mode argument can take three values only: 0, 1, 2, but got %i instead !\n", poolingMode); @@ -1321,7 +1321,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } /*************************************************************************/ else if(poolingMode == 1) { // avg @@ -1379,7 +1379,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } /*************************************************************************/ else if(poolingMode == 2) { // pnorm @@ -1466,7 +1466,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } else { nd4j_printf("ConvolutionUtils::pooling2dBP: pooling mode argument can take three values only: 0, 1, 2, but got %i instead !\n", poolingMode); @@ -1618,7 +1618,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } /*************************************************************************/ else if(poolingMode == 1) { // avg @@ -1679,7 +1679,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } /*************************************************************************/ else if(poolingMode == 2) { // pnorm @@ -1761,7 +1761,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1, 0, oD, 1); } else { nd4j_printf("ConvolutionUtils::pooling3dBP: pooling mode argument can take three values only: 0, 1, 2, but got %i instead !\n", poolingMode); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/crop_and_resize.hpp b/libnd4j/include/ops/declarable/helpers/cpu/crop_and_resize.hpp index c7d29c471..6face5f7a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/crop_and_resize.hpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/crop_and_resize.hpp @@ -115,7 +115,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, cropHeight); + sd::Threads::parallel_for(func, 0, cropHeight); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/cross.cpp b/libnd4j/include/ops/declarable/helpers/cpu/cross.cpp index 51af1840b..0d00b4134 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/cross.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/cross.cpp @@ -48,7 +48,7 @@ void crossBatched(sd::LaunchContext * context, NDArray *a, NDArray *b, NDArray * } }; - samediff::Threads::parallel_tad(func, 0, tads); + sd::Threads::parallel_tad(func, 0, tads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp index 700e5b8dd..b756ee80c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp @@ -65,7 +65,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, total_count); + sd::Threads::parallel_for(func, 0, total_count); } else { const int total_count = batch_size * input_depth_by_input_area; @@ -89,7 +89,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, total_count); + sd::Threads::parallel_for(func, 0, total_count); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/diGamma.cpp b/libnd4j/include/ops/declarable/helpers/cpu/diGamma.cpp index 37abaf559..e0bbe9572 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/diGamma.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/diGamma.cpp @@ -35,7 +35,7 @@ static void diGamma_(const NDArray& x, NDArray& z) { for (auto i = start; i < stop; i++) z.p(i, diGammaScalar(x.e(i))); }; - samediff::Threads::parallel_for(func, 0, x.lengthOf()); + sd::Threads::parallel_for(func, 0, x.lengthOf()); } void diGamma(sd::LaunchContext* context, const NDArray& x, NDArray& z) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp index fbf071e28..bfe087467 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp @@ -87,7 +87,7 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); } void dilation2d(sd::LaunchContext* context, NDArray *input, NDArray *weights, NDArray *output, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dropout.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dropout.cpp index 54981dea5..e739d5f55 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dropout.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dropout.cpp @@ -43,7 +43,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, inLen); + sd::Threads::parallel_for(func, 0, inLen); } BUILD_SINGLE_TEMPLATE(template void dropoutSimple, (NDArray const* input, NDArray* output, double probValue, int seed), FLOAT_TYPES); @@ -137,7 +137,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf()); + sd::Threads::parallel_for(func, 0, input->lengthOf()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp index 2b6b4cd02..932818198 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp @@ -71,7 +71,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, outSize); + sd::Threads::parallel_tad(func, 0, outSize); } } template @@ -177,7 +177,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, gradsSize); + sd::Threads::parallel_tad(func, 0, gradsSize); } outputList[1]->assign(indices); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp index 15ea569e8..1b4e07346 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp @@ -82,7 +82,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(func, 0, batchCount); + sd::Threads::parallel_tad(func, 0, batchCount); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp index fb715a5e5..027ba6e0f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp @@ -63,7 +63,7 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in output->p(i, input->e(indices->e(i))); }; - samediff::Threads::parallel_for(func, 0, output->lengthOf()); + sd::Threads::parallel_for(func, 0, output->lengthOf()); } else { @@ -96,7 +96,7 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } else { auto func = PRAGMA_THREADS_FOR { @@ -112,7 +112,7 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } } @@ -148,7 +148,7 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in std::memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } else { @@ -167,7 +167,7 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/hamming.cpp b/libnd4j/include/ops/declarable/helpers/cpu/hamming.cpp index 10b6a27e0..01e6a76d9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/hamming.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/hamming.cpp @@ -64,7 +64,7 @@ namespace sd { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, lengthOf); + maxThreads = sd::Threads::parallel_for(func, 0, lengthOf); } else if (xEws > 1 && yEws > 1 && x.ordering() == y.ordering()) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { @@ -75,7 +75,7 @@ namespace sd { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, lengthOf); + maxThreads = sd::Threads::parallel_for(func, 0, lengthOf); } else { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { @@ -86,7 +86,7 @@ namespace sd { } }; - maxThreads = samediff::Threads::parallel_for(func, 0, lengthOf); + maxThreads = sd::Threads::parallel_for(func, 0, lengthOf); } // accumulate intermediate variables into output array diff --git a/libnd4j/include/ops/declarable/helpers/cpu/hashcode.cpp b/libnd4j/include/ops/declarable/helpers/cpu/hashcode.cpp index 5893b2c88..692025430 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/hashcode.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/hashcode.cpp @@ -54,7 +54,7 @@ namespace sd { tempBuffer[b] = r; } }; - samediff::Threads::parallel_tad(func, 0, numBlocks); + sd::Threads::parallel_tad(func, 0, numBlocks); // we replace pointer with intermediate one, and repeat only one chunk left int iterationCount = 0; @@ -76,7 +76,7 @@ namespace sd { tempResult[b] = r; } }; - samediff::Threads::parallel_tad(func2, 0, numBlocks); + sd::Threads::parallel_tad(func2, 0, numBlocks); iterationCount++; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp index 2129b4bee..f664d8308 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp @@ -90,7 +90,7 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, iC, 1); } else { @@ -124,7 +124,7 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp index ee4faafb0..9e1b222f5 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp @@ -149,7 +149,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(func, 0, batchSize); + sd::Threads::parallel_tad(func, 0, batchSize); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp index 2f0f00779..a451fe660 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp @@ -178,7 +178,7 @@ namespace helpers { interpolationData[i]._interpolarValue = in - in_f; } }; - samediff::Threads::parallel_for(func, 0, outSize); + sd::Threads::parallel_for(func, 0, outSize); } /** @@ -240,7 +240,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(func, 0, batchSize); + sd::Threads::parallel_tad(func, 0, batchSize); } template @@ -285,7 +285,7 @@ namespace helpers { xs[i]._topIndex *= channels; } }; - samediff::Threads::parallel_for(func, 0, xsSize); + sd::Threads::parallel_for(func, 0, xsSize); resizeImage_(images->getDataBuffer()->primaryAsT(), batchSize, inHeight, inWidth, outHeight, outWidth, channels, xs, ys, output->dataBuffer()->primaryAsT()); return Status::OK(); @@ -323,7 +323,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_for(func, 0, batchSize, 1, 0, outHeight, 1); + sd::Threads::parallel_for(func, 0, batchSize, 1, 0, outHeight, 1); } template @@ -427,7 +427,7 @@ namespace helpers { coeffs_table[i * 2 + 1] = ((a * x - 5 * a) * x + 8 * a) * x - 4 * a; } }; - samediff::Threads::parallel_for(func, 0, kTableSize); + sd::Threads::parallel_for(func, 0, kTableSize); return coeffs_table; } @@ -541,7 +541,7 @@ namespace helpers { x_wai._index3); } }; - samediff::Threads::parallel_for(func, 0, resizer_state.outWidth); + sd::Threads::parallel_for(func, 0, resizer_state.outWidth); } else { auto func = PRAGMA_THREADS_FOR { for (auto x = start; x < stop; ++x) { @@ -552,7 +552,7 @@ namespace helpers { x_wai._index3); } }; - samediff::Threads::parallel_for(func, 0, resizer_state.outWidth); + sd::Threads::parallel_for(func, 0, resizer_state.outWidth); } // Scale the values so they can be used as offsets into buffers. auto func = PRAGMA_THREADS_FOR { @@ -563,7 +563,7 @@ namespace helpers { (*x_wais)[x]._index3 *= resizer_state.channels; } }; - samediff::Threads::parallel_for(func, 0, resizer_state.outWidth); + sd::Threads::parallel_for(func, 0, resizer_state.outWidth); } template @@ -774,7 +774,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(func, 0, batchNum); + sd::Threads::parallel_tad(func, 0, batchNum); } // simplified bicubic resize without antialiasing @@ -950,7 +950,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(batchProcess, 0, st.batchSize, 1); + sd::Threads::parallel_tad(batchProcess, 0, st.batchSize, 1); } template @@ -981,7 +981,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(cachingProcedure, 0, xCached.size(), 1); + sd::Threads::parallel_for(cachingProcedure, 0, xCached.size(), 1); resizeArea(st, xCached, image, output); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp index 46729fbb8..a6aa1e9ae 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp @@ -45,7 +45,7 @@ static void rgbToGrs_(const NDArray& input, NDArray& output, const int dimC) { } }; - samediff::Threads::parallel_for(func, 0, output.lengthOf(), 1); + sd::Threads::parallel_for(func, 0, output.lengthOf(), 1); return; } @@ -62,7 +62,7 @@ static void rgbToGrs_(const NDArray& input, NDArray& output, const int dimC) { } }; - samediff::Threads::parallel_for(func, 0, output.lengthOf(), 1); + sd::Threads::parallel_for(func, 0, output.lengthOf(), 1); return; } @@ -87,7 +87,7 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con } }; - samediff::Threads::parallel_for(func, 0, input.lengthOf(), 3); + sd::Threads::parallel_for(func, 0, input.lengthOf(), 3); return; } @@ -106,7 +106,7 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); return; } @@ -146,7 +146,7 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); + sd::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); @@ -165,7 +165,7 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } } @@ -196,7 +196,7 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); + sd::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); @@ -222,7 +222,7 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp index 5a4bb28cc..81dd25ca8 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp @@ -195,7 +195,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector } }; - samediff::Threads::parallel_tad(func, 0, tads); + sd::Threads::parallel_tad(func, 0, tads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp index 31235d737..bcf402cd2 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp @@ -96,7 +96,7 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } else { auto func = PRAGMA_THREADS_FOR { @@ -134,7 +134,7 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } return Status::OK(); } @@ -242,7 +242,7 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } else { @@ -317,7 +317,7 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c } }; - samediff::Threads::parallel_tad(func, 0, numOfTads); + sd::Threads::parallel_tad(func, 0, numOfTads); } gradI *= gradO; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstm.cpp index 02d4c9855..a306f7ddc 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lstm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lstm.cpp @@ -130,7 +130,7 @@ static void fusedTanh(NDArray *z, NDArray *i, NDArray *c, const NDArray *cLast, } }; - samediff::Threads::parallel_for(func, 0, uLen); + sd::Threads::parallel_for(func, 0, uLen); } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 8466631da..cfebca94d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -54,7 +54,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(loop, 0, n, 1); + sd::Threads::parallel_tad(loop, 0, n, 1); } } @@ -79,8 +79,8 @@ namespace helpers { invertedMatrix->t(i, i - 1) -= (inputMatrix->t(i, i - 1) * invertedMatrix->t(i - 1, i - 1) / inputMatrix->t(i, i)); }; - samediff::Threads::parallel_for(invertDiagonals, 0, n, 1); - samediff::Threads::parallel_for(invertSubDiagonals, 1, n, 1); + sd::Threads::parallel_for(invertDiagonals, 0, n, 1); + sd::Threads::parallel_for(invertSubDiagonals, 1, n, 1); // PRAGMA_OMP_PARALLEL_FOR_SIMD for (int i = 1; i < n; i++) { @@ -118,8 +118,8 @@ namespace helpers { inputMatrix->t(i, i)); }; - samediff::Threads::parallel_for(invertDiagonals, 0, n, 1); - samediff::Threads::parallel_for(invertUpDiagonals, 0, n - 1, 1); + sd::Threads::parallel_for(invertDiagonals, 0, n, 1); + sd::Threads::parallel_for(invertUpDiagonals, 0, n - 1, 1); // PRAGMA_OMP_PARALLEL_FOR_SIMD for (auto i = n - 2; i >= 0; i--) { @@ -225,7 +225,7 @@ namespace helpers { } } //}; - //samediff::Threads::parallel_for(loop, column, rowNum, 1); + //sd::Threads::parallel_for(loop, column, rowNum, 1); return result; } @@ -247,7 +247,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(loop, currentRow + 1, rowNum, 1); + sd::Threads::parallel_tad(loop, currentRow + 1, rowNum, 1); } template @@ -327,7 +327,7 @@ namespace helpers { luNN_(context, outputs.at(i), permutationVectors?permutations.at(i):nullptr, n); } }; - samediff::Threads::parallel_for(loop, 0, outputs.size(), 1); + sd::Threads::parallel_for(loop, 0, outputs.size(), 1); } void lu(LaunchContext *context, NDArray* input, NDArray* output, NDArray* permutation) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp index 3372950f2..3d4f3692c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp @@ -63,7 +63,7 @@ void matrixSetDiag_(const NDArray& input, const NDArray& diagonal, NDArray& outp z[zOffset] = zeroPad ? static_cast(0) : x[xOffset]; } }; - samediff::Threads::parallel_for(func, 0, xLen); + sd::Threads::parallel_for(func, 0, xLen); } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cpu/matrix_diag_part.cpp b/libnd4j/include/ops/declarable/helpers/cpu/matrix_diag_part.cpp index 3271dc110..8009c42c3 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/matrix_diag_part.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/matrix_diag_part.cpp @@ -51,7 +51,7 @@ int _matrixDiagPart(const NDArray* input, NDArray* output) { listOut.at(i)->p(j, listDiag.at(i)->e(j, j)); }; - samediff::Threads::parallel_tad(func, 0, lO); + sd::Threads::parallel_tad(func, 0, lO); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp index 2730d9e88..319ff2a48 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp @@ -61,7 +61,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, oL); + sd::Threads::parallel_for(func, 0, oL); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp index d3f7add49..82698c0ce 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp @@ -67,7 +67,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } else { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { @@ -88,7 +88,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/polyGamma.cpp b/libnd4j/include/ops/declarable/helpers/cpu/polyGamma.cpp index 2c93cee08..040d4fa47 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/polyGamma.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/polyGamma.cpp @@ -80,7 +80,7 @@ static void polyGamma_(sd::LaunchContext * context, const NDArray& n, const NDAr output.p(i, polyGammaScalar(context, order, x.e(i))); } }; - samediff::Threads::parallel_for(func, 0, x.lengthOf()); + sd::Threads::parallel_for(func, 0, x.lengthOf()); } void polyGamma(sd::LaunchContext * context, const NDArray& n, const NDArray& x, NDArray& output) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/qr.cpp b/libnd4j/include/ops/declarable/helpers/cpu/qr.cpp index 2ea18a79d..ccdb692a1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/qr.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/qr.cpp @@ -48,7 +48,7 @@ namespace helpers { resBuf[i * n + j] = -2 * vBuf[i] * vBuf[j] + (i == j ? T(1) : T(0)); }; - samediff::Threads::parallel_for(interloop, 0, n, 1, 0, n, 1); + sd::Threads::parallel_for(interloop, 0, n, 1, 0, n, 1); return res; } @@ -119,7 +119,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(batching, 0, listOutQ.size(), 1); + sd::Threads::parallel_tad(batching, 0, listOutQ.size(), 1); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp index b38101feb..b145e4318 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp @@ -197,7 +197,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, batchValue, 1, 0, numOfSamples, 1); + sd::Threads::parallel_for(func, 0, batchValue, 1, 0, numOfSamples, 1); rng.rewindH(output.lengthOf()*numOfClassX); return; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp index e4349ac8a..e1eff820d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp @@ -42,7 +42,7 @@ static void _range(const NDArray& start, const NDArray& delta, NDArray& outVecto for (auto i = start; i < stop; i++) buff[i] = s + i * d; }; - samediff::Threads::parallel_for(func, 0, len); + sd::Threads::parallel_for(func, 0, len); } void range(sd::LaunchContext * context, const NDArray& start, const NDArray& delta, NDArray& outVector) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp index 3d17fb62a..9a775bc11 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp @@ -59,7 +59,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in swap(inArr, e, idx); } }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); } else if (inEWS > 1) { auto func = PRAGMA_THREADS_FOR { @@ -70,7 +70,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in } }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); } else { @@ -82,7 +82,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in } }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); } } else { @@ -96,14 +96,14 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in for (Nd4jLong e = start; e < stop; e++) outArr[sLength - e] = inArr[e]; }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse); if(inLength != numOfElemsToReverse) { auto f2 = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) outArr[e] = inArr[e]; }; - samediff::Threads::parallel_for(f2, numOfElemsToReverse, inLength); + sd::Threads::parallel_for(f2, numOfElemsToReverse, inLength); } } else if (inEWS >= 1 && outEWS >= 1 && inOrder == outOrder) { @@ -112,14 +112,14 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in for (auto e = start; e < stop; e++) outArr[(sLength - e) * outEWS] = inArr[e * inEWS]; }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse); if(inLength != numOfElemsToReverse) { auto f2 = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) outArr[e * outEWS] = inArr[e * inEWS]; }; - samediff::Threads::parallel_for(f2, numOfElemsToReverse, inLength); + sd::Threads::parallel_for(f2, numOfElemsToReverse, inLength); } } else { @@ -131,7 +131,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in outArr[outOffset] = inArr[inOffset]; } }; - samediff::Threads::parallel_for(func, 0, numOfElemsToReverse); + sd::Threads::parallel_for(func, 0, numOfElemsToReverse); if(inLength != numOfElemsToReverse) { @@ -142,7 +142,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in outArr[outOffset] = inArr[inOffset]; } }; - samediff::Threads::parallel_for(f2, numOfElemsToReverse, inLength); + sd::Threads::parallel_for(f2, numOfElemsToReverse, inLength); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp index bbbb9199e..cbc926fce 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp @@ -69,7 +69,7 @@ static void batchToSpace_(const NDArray& input, NDArray& output, const uint crop } }; - samediff::Threads::parallel_for(func, 0, bS, 1, cropBottom, iH - cropTop, 1, cropLeft, iW - cropRight, 1); + sd::Threads::parallel_for(func, 0, bS, 1, cropBottom, iH - cropTop, 1, cropLeft, iW - cropRight, 1); } BUILD_SINGLE_TEMPLATE(template void batchToSpace_, (const NDArray& input, NDArray& output, const uint cropBottom, const uint cropTop, const uint cropLeft, const uint cropRight), LIBND4J_TYPES); @@ -128,7 +128,7 @@ static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } BUILD_SINGLE_TEMPLATE(template void batchToSpaceND_, (const NDArray& input, const NDArray& crop, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES); @@ -234,7 +234,7 @@ static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padB } }; - samediff::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); + sd::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1); } BUILD_SINGLE_TEMPLATE(template void spaceToBatch_, (const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight), LIBND4J_TYPES); @@ -327,7 +327,7 @@ static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArra } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } BUILD_SINGLE_TEMPLATE(template void spaceToBatchND_, (const NDArray& input, const NDArray& padding, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp index 32968b486..05eef572f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp @@ -69,7 +69,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, total_count); + sd::Threads::parallel_for(func, 0, total_count); } else { const int total_count = batch_size * output_depth_by_output_area; @@ -93,7 +93,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, total_count); + sd::Threads::parallel_for(func, 0, total_count); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp index 2d9250f9b..e147860ed 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp @@ -58,7 +58,7 @@ Nd4jLong checkIndices_(const NDArray& indices, const NDArray& output, const int } }; - samediff::Threads::parallel_for(func, 0, indices.lengthOf()); + sd::Threads::parallel_for(func, 0, indices.lengthOf()); return numOfBadIndx; } @@ -87,7 +87,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + sd::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); } else { // outRank > 1 @@ -107,7 +107,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + sd::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); } } @@ -129,7 +129,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + sd::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); } else { std::vector dimsToExcludeInd = ShapeUtils::evalDimsToExclude(indRank, {indRank-1}); @@ -154,7 +154,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind } }; - samediff::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + sd::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); } } @@ -176,7 +176,7 @@ void scatterForLoss(sd::LaunchContext *context, const NDArray& indices, NDArray } }; - samediff::Threads::parallel_for(func, 0, indicesLen); + sd::Threads::parallel_for(func, 0, indicesLen); } else { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { @@ -186,7 +186,7 @@ void scatterForLoss(sd::LaunchContext *context, const NDArray& indices, NDArray } }; - samediff::Threads::parallel_for(func, 0, indicesLen); + sd::Threads::parallel_for(func, 0, indicesLen); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp index e57264e66..a432049a7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp @@ -173,7 +173,7 @@ namespace helpers { meanV.p(e, meanV.e(e) + listOfTensors.at(i)->e(e)); } }; - samediff::Threads::parallel_for(func, 0, meanT->lengthOf()); + sd::Threads::parallel_for(func, 0, meanT->lengthOf()); count++; } @@ -227,7 +227,7 @@ namespace helpers { sumT->p(e, sumT->e(e) + listOfTensors.at(i)->e(e)); } }; - samediff::Threads::parallel_for(func, 0, sumT->lengthOf()); + sd::Threads::parallel_for(func, 0, sumT->lengthOf()); } else { idx = indices->e(i); @@ -276,7 +276,7 @@ namespace helpers { sumT->p(e, sumT->e(e) * listOfTensors.at(i)->e(e)); } }; - samediff::Threads::parallel_for(func, 0, sumT->lengthOf()); + sd::Threads::parallel_for(func, 0, sumT->lengthOf()); } else { idx = indices->e(i); @@ -631,7 +631,7 @@ namespace helpers { output->p(e, gradOut->e(classNum)); } }; - samediff::Threads::parallel_for(func, 0, loop_size); + sd::Threads::parallel_for(func, 0, loop_size); } else { std::vector restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); @@ -658,7 +658,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(func, 0, indices->lengthOf()); + sd::Threads::parallel_tad(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; @@ -681,7 +681,7 @@ namespace helpers { output->p(e, gradOut->e(classNum)); } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf()); + sd::Threads::parallel_for(func, 0, input->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); @@ -711,7 +711,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(func, 0, indices->lengthOf()); + sd::Threads::parallel_tad(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } @@ -758,7 +758,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } @@ -791,7 +791,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } @@ -828,7 +828,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; @@ -894,7 +894,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, input->lengthOf()); + sd::Threads::parallel_for(func, 0, input->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); @@ -918,7 +918,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; @@ -993,7 +993,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } @@ -1010,7 +1010,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + sd::Threads::parallel_for(func, 0, indices->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); @@ -1032,7 +1032,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); @@ -1059,7 +1059,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); @@ -1081,7 +1081,7 @@ namespace helpers { } //}; - //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + //sd::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp index 8e25c4690..64839678b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp @@ -34,7 +34,7 @@ namespace helpers { output->t(k * maxIndex + i) = B(true); //, T(1.0f)); }; - samediff::Threads::parallel_for(func, 0, maxIndex, 1, 0, input->lengthOf(), 1); + sd::Threads::parallel_for(func, 0, maxIndex, 1, 0, input->lengthOf(), 1); } void sequenceMask(sd::LaunchContext * context, NDArray* input, NDArray* output, int maxIndex) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sg_cb.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sg_cb.cpp index 07cbca04e..7543e6433 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/sg_cb.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/sg_cb.cpp @@ -425,7 +425,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, numTargets, 1, numThreads); + sd::Threads::parallel_tad(func, 0, numTargets, 1, numThreads); } BUILD_SINGLE_TEMPLATE(template void skipgramBatchExec_, (NDArray &s0, NDArray &s1, NDArray &s1n, void *vexpTable, void *vnegTable, void *vinfVector, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength, const bool preciseMode, const int numThreads), FLOAT_TYPES); @@ -577,7 +577,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, numTargets, 1, numThreads); + sd::Threads::parallel_tad(func, 0, numTargets, 1, numThreads); } BUILD_SINGLE_TEMPLATE(template void cbowBatchExec_, (NDArray &s0, NDArray &s1, NDArray &s1n, void *vexpTable, void *vnegTable, void *vinfVector, NDArray &context, NDArray &lockedWords, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, NDArray &nLabels, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength, const bool trainWords, const int numThreads), FLOAT_TYPES); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp index e2c0f5183..5b8621a5b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp @@ -136,7 +136,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func,0, numOfSubArrs); + sd::Threads::parallel_tad(func,0, numOfSubArrs); } #endif @@ -168,7 +168,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func,0, numOfSubArrs); + sd::Threads::parallel_tad(func,0, numOfSubArrs); } ////////////////////////////////////////////////////////////////////////// @@ -228,7 +228,7 @@ namespace sd { } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); delete []offsets; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp index 9a06975aa..1be590288 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp @@ -48,7 +48,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(batchLoop, 0, inputPart.size(), 1); + sd::Threads::parallel_tad(batchLoop, 0, inputPart.size(), 1); } // --------------------------------------------------------------------------------------------------------------------------------------- // diff --git a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp index d138d9892..330099f33 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp @@ -115,7 +115,7 @@ namespace helpers { } }; - samediff::Threads::parallel_for(func, 0, input.lengthOf()); + sd::Threads::parallel_for(func, 0, input.lengthOf()); } void split(sd::LaunchContext* context, const NDArray& input, std::vector& outArrs, const int axis) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp index ecd5ead2b..e2608bad6 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp @@ -184,7 +184,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray } }; - samediff::Threads::parallel_tad(func, 0, ncols); + sd::Threads::parallel_tad(func, 0, ncols); } ////////////////////////////////////////////////////////////////////////// @@ -303,7 +303,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr } }; - samediff::Threads::parallel_tad(func, 0, ncols); + sd::Threads::parallel_tad(func, 0, ncols); // gradB gradBias.reduceAlongDimension(reduce::Sum, *gradB, {0}); // [4*K] diff --git a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp index f8fc07201..000ef36f6 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp @@ -43,7 +43,7 @@ static void stack_(const std::vector& inArrs, NDArray& output, c output.p(i, inArrs[i]->t(0)); }; - samediff::Threads::parallel_for(func, 0, numOfSubArrs); + sd::Threads::parallel_for(func, 0, numOfSubArrs); } else { @@ -63,7 +63,7 @@ static void stack_(const std::vector& inArrs, NDArray& output, c } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } @@ -88,7 +88,7 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, outArrs[i]->p(0, input.t(i)); }; - samediff::Threads::parallel_for(func, 0, numOfSubArrs); + sd::Threads::parallel_for(func, 0, numOfSubArrs); } else { @@ -107,7 +107,7 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp index 7e0b07da0..51c1cafd7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp @@ -163,7 +163,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(func, 0, target->lengthOf()); + sd::Threads::parallel_tad(func, 0, target->lengthOf()); } return status; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/transforms.cpp b/libnd4j/include/ops/declarable/helpers/cpu/transforms.cpp index fa3570879..48727def5 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/transforms.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/transforms.cpp @@ -48,7 +48,7 @@ static void triuBP_(sd::LaunchContext * context, const NDArray& input, const NDA dOdI.t(i) = static_cast(1.f); } }; - samediff::Threads::parallel_for(func, 0, dLen); + sd::Threads::parallel_for(func, 0, dLen); // FIXME: !!! gradI.assign(dOdI * gradO); // chain rule: dLoss/dI = dO/dI * dLoss/dO @@ -68,7 +68,7 @@ static void trace_(const NDArray& input, NDArray& output) { for (auto i = start; i < stop; i++) output.p(i, setOfSubArrs.at(i)->getTrace()); }; - samediff::Threads::parallel_for(func, 0, setOfSubArrs.size()); + sd::Threads::parallel_for(func, 0, setOfSubArrs.size()); } void trace(sd::LaunchContext * context, const NDArray& input, NDArray& output) { @@ -211,7 +211,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } else { // REFLECT and SYMMETRIC cases @@ -237,7 +237,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } } @@ -606,7 +606,7 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { } }; - samediff::Threads::parallel_tad(func, 0, zLen); + sd::Threads::parallel_tad(func, 0, zLen); } //////////////////////////////////////////////////////////////////////// @@ -654,7 +654,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->p(e, input->e(indices->e(e))); }; - samediff::Threads::parallel_for(func, 0, indices->lengthOf()); + sd::Threads::parallel_for(func, 0, indices->lengthOf()); } else { @@ -670,7 +670,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } else { @@ -694,7 +694,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con } }; - samediff::Threads::parallel_tad(func, 0, numOfSubArrs); + sd::Threads::parallel_tad(func, 0, numOfSubArrs); } } } @@ -714,7 +714,7 @@ void eye(sd::LaunchContext * context, NDArray& output) { arrs.at(i)->setIdentity(); }; - samediff::Threads::parallel_tad(func, 0, arrs.size()); + sd::Threads::parallel_tad(func, 0, arrs.size()); } ////////////////////////////////////////////////////////////////////////// @@ -772,7 +772,7 @@ void scatterUpdate(sd::LaunchContext * context, NDArray& input, NDArray& updates } }; - samediff::Threads::parallel_tad(func, 0, indices.size()); + sd::Threads::parallel_tad(func, 0, indices.size()); } @@ -792,7 +792,7 @@ void scatterSimple(sd::LaunchContext * context, const int opId, NDArray& input, } }; - samediff::Threads::parallel_for(func, 0, len); + sd::Threads::parallel_for(func, 0, len); } break; @@ -824,7 +824,7 @@ static void mergeMaxIndex_(const std::vector& inArrs, NDArray& output) } }; - samediff::Threads::parallel_for(func, 0, x->lengthOf()); + sd::Threads::parallel_for(func, 0, x->lengthOf()); } void mergeMaxIndex(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output) { @@ -850,7 +850,7 @@ static void mergeMax_(const std::vector& inArrs, NDArray& output) { } }; - samediff::Threads::parallel_for(func, 0, x->lengthOf()); + sd::Threads::parallel_for(func, 0, x->lengthOf()); } void mergeMax(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output) { @@ -875,7 +875,7 @@ static void mergeAvg_(const std::vector& inArrs, NDArray& output) { } }; - samediff::Threads::parallel_for(func, 0, x->lengthOf()); + sd::Threads::parallel_for(func, 0, x->lengthOf()); } void mergeAvg(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output) { @@ -900,7 +900,7 @@ static void mergeAdd_(const std::vector& inArrs, NDArray& output) { } }; - samediff::Threads::parallel_for(func, 0, x->lengthOf()); + sd::Threads::parallel_for(func, 0, x->lengthOf()); } void mergeAdd(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output) { BUILD_SINGLE_SELECTOR(output.dataType(), mergeAdd_, (inArrs, output), LIBND4J_TYPES); @@ -934,7 +934,7 @@ static void clipByNorm_(NDArray& input, NDArray& output, const std::vector& *listOfInSubArrs.at(i) *= normClip / iNormActual; } }; - samediff::Threads::parallel_tad(func, 0, listOfInSubArrs.size()); + sd::Threads::parallel_tad(func, 0, listOfInSubArrs.size()); } } else { @@ -963,7 +963,7 @@ static void clipByNorm_(NDArray& input, NDArray& output, const std::vector& *outputSubArr *= clipNorm / iNormActual; } }; - samediff::Threads::parallel_tad(func, 0, listOfInSubArrs.size()); + sd::Threads::parallel_tad(func, 0, listOfInSubArrs.size()); } } } @@ -1079,7 +1079,7 @@ static void clipByNormBP_(const NDArray& input, const NDArray& gradO, NDArray& g gradISubArr->assign(gradOSubArr); } }; - samediff::Threads::parallel_tad(func, 0, gradISubArrs.size()); + sd::Threads::parallel_tad(func, 0, gradISubArrs.size()); } } @@ -1215,7 +1215,7 @@ static void mirrorPad_(const NDArray& input, const NDArray& paddings, NDArray& o } }; - samediff::Threads::parallel_for(func, 0, outLen); + sd::Threads::parallel_for(func, 0, outLen); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp index bcf406392..2475e5b09 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp @@ -99,7 +99,7 @@ namespace helpers { } }; - samediff::Threads::parallel_tad(batchLoop, 0, leftPart.size(), 1); + sd::Threads::parallel_tad(batchLoop, 0, leftPart.size(), 1); return Status::OK(); @@ -128,7 +128,7 @@ namespace helpers { } } }; - samediff::Threads::parallel_tad(batchLoop, 0, inputPart.size(), 1); + sd::Threads::parallel_tad(batchLoop, 0, inputPart.size(), 1); } int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/zeta.cpp b/libnd4j/include/ops/declarable/helpers/cpu/zeta.cpp index d127fc166..3475af4ed 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/zeta.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/zeta.cpp @@ -68,7 +68,7 @@ static void zeta_(sd::LaunchContext * context, const NDArray& x, const NDArray& z.p(i, zetaScalar(x.e(i), q.e(i))); }; - samediff::Threads::parallel_for(func, 0, xLen); + sd::Threads::parallel_for(func, 0, xLen); } void zeta(sd::LaunchContext * context, const NDArray& x, const NDArray& q, NDArray& z) { diff --git a/libnd4j/include/ops/declarable/helpers/cross.h b/libnd4j/include/ops/declarable/helpers/cross.h index bd1e2a61d..9318bdcc9 100644 --- a/libnd4j/include/ops/declarable/helpers/cross.h +++ b/libnd4j/include/ops/declarable/helpers/cross.h @@ -77,7 +77,7 @@ void FORCEINLINE cross(sd::LaunchContext * context, NDArray *a, NDArray *b, NDAr } }; - samediff::Threads::parallel_tad(func, 0, tads); + sd::Threads::parallel_tad(func, 0, tads); } void weightedCrossEntropyWithLogitsFunctor(sd::LaunchContext * context, NDArray const* targets, NDArray const* input, NDArray const* weights, NDArray* output); diff --git a/libnd4j/include/ops/declarable/helpers/impl/unique.cpp b/libnd4j/include/ops/declarable/helpers/impl/unique.cpp index c67b713c2..7cbbb05bb 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/unique.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/unique.cpp @@ -75,7 +75,7 @@ namespace helpers { counts->p(e, countsMap[valuesVector[e]]); } }; - samediff::Threads::parallel_for(func, 0, values->lengthOf()); + sd::Threads::parallel_for(func, 0, values->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); e++) { auto posI = std::find(valuesVector.begin(), valuesVector.end(), input->e(e)); diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 493834a4e..2facaf901 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -1107,8 +1107,8 @@ namespace sd { return ND4J_STATUS_OK; } - samediff::EmptyHandling DeclarableOp::emptyHandling() { - return samediff::EmptyHandling::EMPTY_SKIP; + sd::EmptyHandling DeclarableOp::emptyHandling() { + return sd::EmptyHandling::EMPTY_SKIP; } void DeclarableOp::registerTypes() { diff --git a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp index 65d694dea..d29f5791a 100644 --- a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp +++ b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp @@ -173,7 +173,7 @@ namespace sd { } void OpRegistrator::registerHelper(sd::ops::platforms::PlatformHelper* op) { - std::pair p = {op->hash(), op->engine()}; + std::pair p = {op->hash(), op->engine()}; if (_helpersLH.count(p) > 0) throw std::runtime_error("Tried to double register PlatformHelper"); @@ -181,10 +181,10 @@ namespace sd { nd4j_debug("Adding helper for op \"%s\": [%lld - %i]\n", op->name().c_str(), op->hash(), (int) op->engine()); - std::pair, sd::ops::platforms::PlatformHelper*> pair({op->name(), op->engine()}, op); + std::pair, sd::ops::platforms::PlatformHelper*> pair({op->name(), op->engine()}, op); _helpersH.insert(pair); - std::pair, sd::ops::platforms::PlatformHelper*> pair2(p, op); + std::pair, sd::ops::platforms::PlatformHelper*> pair2(p, op); _helpersLH.insert(pair2); } @@ -230,16 +230,16 @@ namespace sd { return _declarablesD.at(name); } - sd::ops::platforms::PlatformHelper* OpRegistrator::getPlatformHelper(Nd4jLong hash, samediff::Engine engine) { - std::pair p = {hash, engine}; + sd::ops::platforms::PlatformHelper* OpRegistrator::getPlatformHelper(Nd4jLong hash, sd::Engine engine) { + std::pair p = {hash, engine}; if (_helpersLH.count(p) == 0) throw std::runtime_error("Requested helper can't be found"); return _helpersLH[p]; } - bool OpRegistrator::hasHelper(Nd4jLong hash, samediff::Engine engine) { - std::pair p = {hash, engine}; + bool OpRegistrator::hasHelper(Nd4jLong hash, sd::Engine engine) { + std::pair p = {hash, engine}; return _helpersLH.count(p) > 0; } @@ -262,14 +262,14 @@ namespace sd { } namespace std { - size_t hash>::operator()(const std::pair& k) const { + size_t hash>::operator()(const std::pair& k) const { using std::hash; auto res = std::hash()(k.first); res ^= std::hash()((int) k.second) + 0x9e3779b9 + (res << 6) + (res >> 2); return res; } - size_t hash>::operator()(const std::pair& k) const { + size_t hash>::operator()(const std::pair& k) const { using std::hash; auto res = std::hash()(k.first); res ^= std::hash()((int) k.second) + 0x9e3779b9 + (res << 6) + (res >> 2); diff --git a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp index fe0928ce6..6b79fd145 100644 --- a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp +++ b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp @@ -24,7 +24,7 @@ namespace sd { namespace ops { namespace platforms { - PlatformHelper::PlatformHelper(const char *name, samediff::Engine engine) { + PlatformHelper::PlatformHelper(const char *name, sd::Engine engine) { // we just store name/hash of target operation _name = std::string(name); _hash = HashHelper::getInstance()->getLongHash(_name); @@ -75,7 +75,7 @@ namespace sd { return z; } - samediff::Engine PlatformHelper::engine() { + sd::Engine PlatformHelper::engine() { return _engine; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/avgpooling2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/avgpooling2d.cpp index 9df7bedf3..4ed355694 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/avgpooling2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/avgpooling2d.cpp @@ -29,7 +29,7 @@ #include using namespace dnnl; -using namespace samediff; +using namespace sd; namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.h b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.h index 693e515b1..af0e8b6f0 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.h +++ b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.h @@ -31,7 +31,7 @@ #include #include -using namespace samediff; +using namespace sd; namespace sd { diff --git a/libnd4j/include/ops/impl/gemm.cpp b/libnd4j/include/ops/impl/gemm.cpp index 0c4ab167c..d879ca7ae 100644 --- a/libnd4j/include/ops/impl/gemm.cpp +++ b/libnd4j/include/ops/impl/gemm.cpp @@ -44,7 +44,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, rows); + sd::Threads::parallel_for(func, 0, rows); return ret; } @@ -76,7 +76,7 @@ namespace sd { for (auto r = start; r < stop; r++) C[r] = z; }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); } } @@ -108,7 +108,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, M, 1, 0, N, 1); + sd::Threads::parallel_for(func, 0, M, 1, 0, N, 1); } @@ -138,7 +138,7 @@ namespace sd { z[r] = beta == 0.0f ? dot : dot + static_cast(beta) * z[r]; } }; - samediff::Threads::parallel_for(func, 0, M); + sd::Threads::parallel_for(func, 0, M); if (TRANS == CblasTrans) delete[] aT; diff --git a/libnd4j/include/ops/impl/specials_double.hpp b/libnd4j/include/ops/impl/specials_double.hpp index 96f7d2db2..9a1ec4b1e 100644 --- a/libnd4j/include/ops/impl/specials_double.hpp +++ b/libnd4j/include/ops/impl/specials_double.hpp @@ -45,7 +45,7 @@ namespace sd { } }; - samediff::Threads::parallel_for(func, 0, N); + sd::Threads::parallel_for(func, 0, N); }; @@ -240,7 +240,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } }; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } template @@ -264,7 +264,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } }; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } } diff --git a/libnd4j/include/ops/impl/specials_single.hpp b/libnd4j/include/ops/impl/specials_single.hpp index 3cf3d113e..9f3cf7308 100644 --- a/libnd4j/include/ops/impl/specials_single.hpp +++ b/libnd4j/include/ops/impl/specials_single.hpp @@ -79,7 +79,7 @@ namespace sd { // } // }; -// samediff::Threads::parallel_tad(func, 0, numOfArrs); +// sd::Threads::parallel_tad(func, 0, numOfArrs); // return; // } // } @@ -104,7 +104,7 @@ namespace sd { // } // }; -// samediff::Threads::parallel_tad(func, 0, numOfArrs); +// sd::Threads::parallel_tad(func, 0, numOfArrs); // } template @@ -175,6 +175,8 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr // return; // } + auto oShapeInfo = output.getShapeInfo(); + // general case auto func = PRAGMA_THREADS_FOR { @@ -182,8 +184,8 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr for (auto i = start; i < stop; i += increment) { - shape::index2coords(i, output.getShapeInfo(), coords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), coords); + shape::index2coords(i, oShapeInfo, coords); + const auto zOffset = shape::getOffset(oShapeInfo, coords); uint inArrIdx = 0; uint xDim = inArrs[inArrIdx]->sizeAt(axis); @@ -200,7 +202,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr } }; - samediff::Threads::parallel_for(func, 0, output.lengthOf()); + sd::Threads::parallel_for(func, 0, output.lengthOf()); } /** @@ -317,7 +319,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } }; - samediff::Threads::parallel_for(func, 0, input.lengthOf()); + sd::Threads::parallel_for(func, 0, input.lengthOf()); } @@ -343,7 +345,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); } @@ -378,7 +380,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } } }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); // instead of doing element-wise propagation, we just issue memcpy to propagate data for (Nd4jLong ar = 1; ar < n; ar++) { @@ -398,7 +400,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } } }; - samediff::Threads::parallel_for(func, 0, length); + sd::Threads::parallel_for(func, 0, length); // instead of doing element-wise propagation, we just issue memcpy to propagate data for (Nd4jLong ar = 0; ar < n; ar++) { @@ -535,7 +537,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) quickSort_parallel(dx, tadShapeInfo, xTadLength, 1, descending); } }; - samediff::Threads::parallel_tad(func, 0, numTads); + sd::Threads::parallel_tad(func, 0, numTads); } @@ -568,7 +570,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } }; - samediff::Threads::parallel_for(func, 4, lim); + sd::Threads::parallel_for(func, 4, lim); } template @@ -617,7 +619,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) return retVal; }; - return samediff::Threads::parallel_long(func, LAMBDA_SUML, 0, N, 16); + return sd::Threads::parallel_long(func, LAMBDA_SUML, 0, N, 16); } } diff --git a/libnd4j/include/ops/special_random_ops.h b/libnd4j/include/ops/special_random_ops.h index 50a50752e..bb6d14b3f 100644 --- a/libnd4j/include/ops/special_random_ops.h +++ b/libnd4j/include/ops/special_random_ops.h @@ -182,7 +182,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, zLength, 1, _threads); + sd::Threads::parallel_for(func, 0, zLength, 1, _threads); } else { @@ -209,7 +209,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, zLength, 1, _threads); + sd::Threads::parallel_for(func, 0, zLength, 1, _threads); } } }; @@ -352,7 +352,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, middle, 1, _threads); + sd::Threads::parallel_for(func, 0, middle, 1, _threads); } }; @@ -459,7 +459,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, zLength, 1, _threads); + sd::Threads::parallel_for(func, 0, zLength, 1, _threads); } }; @@ -568,7 +568,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, zLength, 1, _threads); + sd::Threads::parallel_for(func, 0, zLength, 1, _threads); } }; @@ -700,7 +700,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, zLength, 1, _threads); + sd::Threads::parallel_for(func, 0, zLength, 1, _threads); } }; @@ -836,7 +836,7 @@ namespace randomOps { } }; - samediff::Threads::parallel_for(func, 0, middle, 1, _threads); + sd::Threads::parallel_for(func, 0, middle, 1, _threads); } }; diff --git a/libnd4j/include/system/platform_boilerplate.h b/libnd4j/include/system/platform_boilerplate.h index bdbb1a051..aebdcb387 100644 --- a/libnd4j/include/system/platform_boilerplate.h +++ b/libnd4j/include/system/platform_boilerplate.h @@ -30,7 +30,7 @@ #define DECLARE_PLATFORM_F(NAME, ENGINE, CNAME) class ND4J_EXPORT PLATFORM_##CNAME : public PlatformHelper {\ public: \ - PLATFORM_##CNAME() : PlatformHelper(#NAME, samediff::Engine::ENGINE) { } \ + PLATFORM_##CNAME() : PlatformHelper(#NAME, sd::Engine::ENGINE) { } \ bool isUsable(graph::Context &context) override; \ Nd4jStatus invokeHelper(graph::Context &context) override; \ }; diff --git a/libnd4j/tests_cpu/layers_tests/CuDnnTests.cu b/libnd4j/tests_cpu/layers_tests/CuDnnTests.cu index 47892c973..ba0b1cab3 100644 --- a/libnd4j/tests_cpu/layers_tests/CuDnnTests.cu +++ b/libnd4j/tests_cpu/layers_tests/CuDnnTests.cu @@ -119,7 +119,7 @@ TEST_F(CuDnnTests, mixed_helpers_test_1) { // cuDNN part Context cuda(1); - cuda.setTargetEngine(samediff::Engine::ENGINE_CUDA); + cuda.setTargetEngine(sd::Engine::ENGINE_CUDA); cuda.setInputArray(0, &input); cuda.setInputArray(1, &weights); cuda.setInputArray(2, &bias); @@ -132,7 +132,7 @@ TEST_F(CuDnnTests, mixed_helpers_test_1) { // MKL-DNN part Context mkl(1); - mkl.setTargetEngine(samediff::Engine::ENGINE_CPU); + mkl.setTargetEngine(sd::Engine::ENGINE_CPU); mkl.setInputArray(0, &input); mkl.setInputArray(1, &weights); mkl.setInputArray(2, &bias); diff --git a/libnd4j/tests_cpu/layers_tests/PerformanceTests.cpp b/libnd4j/tests_cpu/layers_tests/PerformanceTests.cpp index 52c4bd33e..40a58b36c 100644 --- a/libnd4j/tests_cpu/layers_tests/PerformanceTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PerformanceTests.cpp @@ -53,7 +53,7 @@ public: int numIterations = 100; PerformanceTests() { - samediff::ThreadPool::getInstance(); + sd::ThreadPool::getInstance(); } }; diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index ac969ebbd..d9c7e658b 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -112,15 +112,23 @@ TEST_F(PlaygroundTests, test_bert_full_1) { /* // validating graph now + auto timeStart = std::chrono::system_clock::now(); + auto status = GraphExecutioner::execute(graph); + + auto timeEnd = std::chrono::system_clock::now(); + auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); + ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(graph->getVariableSpace()->hasVariable(1620)); auto array = graph->getVariableSpace()->getVariable(1620)->getNDArray(); ASSERT_EQ(z, *array); + nd4j_printf("BERT execution time: [%lld]\n", outerTime); */ + sd::Environment::getInstance()->setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); @@ -534,14 +542,14 @@ TEST_F(PlaygroundTests, test_s_2) { s++; }; - samediff::Threads::parallel_for(func, 0, 8192, 1, 4); + sd::Threads::parallel_for(func, 0, 8192, 1, 4); std::vector values; for (int e = 0; e < 100000; e++) { s = 0; auto timeStart = std::chrono::system_clock::now(); - //samediff::Threads::parallel_for(func, 0, 8192, 1, 4); + //sd::Threads::parallel_for(func, 0, 8192, 1, 4); PRAGMA_OMP_PARALLEL_THREADS(4) { s++; } @@ -566,7 +574,7 @@ TEST_F(PlaygroundTests, test_s_4) { s++; }; - samediff::Threads::parallel_for(func, 0, 8192, 1, 4); + sd::Threads::parallel_for(func, 0, 8192, 1, 4); //////// @@ -615,7 +623,7 @@ TEST_F(PlaygroundTests, test_s_4) { } } }; - samediff::Threads::parallel_for(f2d, 0, xs0, 1, 0, xs1, 1); + sd::Threads::parallel_for(f2d, 0, xs0, 1, 0, xs1, 1); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); @@ -662,8 +670,8 @@ TEST_F(PlaygroundTests, test_s_5) { auto timeStart = std::chrono::system_clock::now(); // picking best fit here - auto splitLoop = samediff::ThreadsHelper::pickLoop2d(numThreads, itersX, itersY); - auto span = samediff::Span2::build(splitLoop, 0, numThreads, startX, stopX, incX, startY, stopY, incY); + auto splitLoop = sd::ThreadsHelper::pickLoop2d(numThreads, itersX, itersY); + auto span = sd::Span2::build(splitLoop, 0, numThreads, startX, stopX, incX, startY, stopY, incY); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); @@ -711,7 +719,7 @@ TEST_F(PlaygroundTests, test_s_3) { for (int e = 0; e < 10000; e++) { - samediff::Threads::parallel_for(func, 0, 8192, 1, 4); + sd::Threads::parallel_for(func, 0, 8192, 1, 4); } } */ diff --git a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp index a9450e9d0..8f0525546 100644 --- a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp @@ -25,7 +25,7 @@ #include #include -using namespace samediff; +using namespace sd; using namespace sd; using namespace sd::ops; using namespace sd::graph; @@ -182,7 +182,7 @@ TEST_F(ThreadsTests, validation_test_2d_1) { } }; - samediff::Threads::parallel_for(func, 0, e, 1, 0, i, 1, t, true); + sd::Threads::parallel_for(func, 0, e, 1, 0, i, 1, t, true); ASSERT_EQ(e * i, sum.load()); } @@ -204,7 +204,7 @@ TEST_F(ThreadsTests, reduction_test_1) { return sum; }; - auto sum = samediff::Threads::parallel_long(func, LAMBDA_AL {return _old + _new;}, 0, 8192, 1, 4); + auto sum = sd::Threads::parallel_long(func, LAMBDA_AL {return _old + _new;}, 0, 8192, 1, 4); ASSERT_EQ(8192, sum); } @@ -213,7 +213,7 @@ TEST_F(ThreadsTests, basic_test_1) { if (!Environment::getInstance()->isCPU()) return; - auto instance = samediff::ThreadPool::getInstance(); + auto instance = sd::ThreadPool::getInstance(); auto array = NDArrayFactory::create('c', {512, 768}); auto like = array.like(); @@ -228,7 +228,7 @@ TEST_F(ThreadsTests, basic_test_1) { }; auto timeStartThreads = std::chrono::system_clock::now(); - samediff::Threads::parallel_for(func, 0, array.lengthOf()); + sd::Threads::parallel_for(func, 0, array.lengthOf()); auto timeEndThreads = std::chrono::system_clock::now(); auto outerTimeThreads = std::chrono::duration_cast (timeEndThreads - timeStartThreads).count(); diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt index 7d3073b58..00d1f7974 100644 --- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt @@ -150,7 +150,7 @@ if ("${SD_EXPERIMENTAL}" STREQUAL "yes") endif() # tests are always compiled with all ops included -SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSD_ALL_OPS=true -DDEFAULT_ENGINE=samediff::ENGINE_CPU") +SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSD_ALL_OPS=true -DDEFAULT_ENGINE=sd::ENGINE_CPU") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # using Clang diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java index 210c4b703..e946fd665 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java @@ -210,7 +210,7 @@ public class TFGraphTestAllSameDiff { //Note: Can't extend BaseNd4jTest here a try { TFGraphTestAllHelper.checkOnlyOutput(inputs, predictions, modelName, BASE_DIR, MODEL_FILENAME, EXECUTE_WITH, TFGraphTestAllHelper.LOADER, maxRE, minAbs, verboseDebugMode); - //TFGraphTestAllHelper.checkIntermediate(inputs, modelName, BASE_DIR, MODEL_FILENAME, EXECUTE_WITH, localTestDir); + //TFGraphTestAllHelper.checkIntermediate(inputs, modelName, BASE_DIR, MODEL_FILENAME, EXECUTE_WITH, localTestDir, false); } catch (Throwable t){ log.error("ERROR Executing test: {} - input keys {}", modelName, (inputs == null ? null : inputs.keySet()), t); throw t;