From bead656febe60db11caab7d22281eb3ab1757adc Mon Sep 17 00:00:00 2001 From: Abdelrauf Date: Sat, 8 Feb 2020 16:31:30 +0400 Subject: [PATCH 1/7] Initial performance improvement for Bias Add and etc #8556 (#217) * Initial performance improvement for Bias Add, loop coords helpers and increment aligned parallel threading Signed-off-by: AbdelRauf * One more test for Rauf Signed-off-by: raver119 * disable couple of perf tests Signed-off-by: raver119 Co-authored-by: raver119 --- libnd4j/include/execution/Threads.h | 14 +- libnd4j/include/execution/impl/Threads.cpp | 82 ++ libnd4j/include/helpers/LoopsCoordsHelper.h | 440 +++++++++++ .../ops/declarable/helpers/cpu/addBias.cpp | 726 ++++++++++++++---- .../layers_tests/LoopCoordsHelperTests.cpp | 223 ++++++ .../layers_tests/PlaygroundTests.cpp | 82 ++ 6 files changed, 1427 insertions(+), 140 deletions(-) create mode 100644 libnd4j/include/helpers/LoopsCoordsHelper.h create mode 100644 libnd4j/tests_cpu/layers_tests/LoopCoordsHelperTests.cpp diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h index 14467883f..3a1fd8951 100644 --- a/libnd4j/include/execution/Threads.h +++ b/libnd4j/include/execution/Threads.h @@ -14,9 +14,9 @@ * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ -// -// @author raver119@gmail.com -// + // + // @author raver119@gmail.com + // #ifndef SAMEDIFF_THREADS_H #define SAMEDIFF_THREADS_H @@ -165,6 +165,14 @@ namespace samediff { static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads()); static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads()); + + /** + * This method will execute function in parallel preserving the parts to be aligned increment size + * PLEASE NOTE: this function can use smaller number of threads than requested. + * + */ + static int parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = nd4j::Environment::getInstance()->maxMasterThreads()); + }; } diff --git a/libnd4j/include/execution/impl/Threads.cpp b/libnd4j/include/execution/impl/Threads.cpp index 982b59a4c..94710731e 100644 --- a/libnd4j/include/execution/impl/Threads.cpp +++ b/libnd4j/include/execution/impl/Threads.cpp @@ -638,4 +638,86 @@ namespace samediff { return intermediatery[0]; } + + int Threads::parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size , uint32_t req_numThreads) { + if (start > stop) + throw std::runtime_error("Threads::parallel_for got start > stop"); + auto num_elements = (stop - start); + //this way we preserve increment starts offset + //so we will parition considering delta but not total elements + auto delta = (stop - start) / increment; + + // in some cases we just fire func as is + if (delta == 0 || req_numThreads == 1) { + function(0, start, stop, increment); + return 1; + } + int numThreads = 0; + + int adjusted_numThreads = samediff::ThreadsHelper::numberOfThreads(req_numThreads, (num_elements * sizeof(double)) / (200 * type_size)); + + if (adjusted_numThreads > delta) + adjusted_numThreads = delta; + // shortcut + if (adjusted_numThreads <= 1) { + function(0, start, stop, increment); + return 1; + } + //take span as ceil + auto spand = std::ceil((double)delta / (double)adjusted_numThreads); + numThreads = static_cast(std::ceil((double)delta / spand)); + auto span = static_cast(spand); + + auto ticket = samediff::ThreadPool::getInstance()->tryAcquire(numThreads); + if (ticket != nullptr) { + //tail_add is additional value of the last part + //it could be negative or positive + //we will spread that value across + auto tail_add = delta - numThreads * span; + Nd4jLong begin = 0; + Nd4jLong end = 0; + + //we will try enqueu bigger parts first + decltype(span) span1, span2; + int last = 0; + if (tail_add >= 0) { + //for span == 1 , tail_add is 0 + last = tail_add; + span1 = span + 1; + span2 = span; + } + else { + last = numThreads + tail_add;// -std::abs(tail_add); + span1 = span; + span2 = span - 1; + } + for (int i = 0; i < last; i++) { + end = begin + span1 * increment; + // putting the task into the queue for a given thread + ticket->enqueue(i, numThreads, function, begin, end, increment); + begin = end; + } + for (int i = last; i < numThreads - 1; i++) { + end = begin + span2 * increment; + // putting the task into the queue for a given thread + ticket->enqueue(i, numThreads, function, begin, end, increment); + begin = end; + } + //for last one enqueue last offset as stop + //we need it in case our ((stop-start) % increment ) > 0 + ticket->enqueue(numThreads - 1, numThreads, function, begin, stop, increment); + // block and wait till all threads finished the job + ticket->waitAndRelease(); + // we tell that parallelism request succeeded + return numThreads; + } + else { + // if there were no threads available - we'll execute function right within current thread + function(0, start, stop, increment); + // we tell that parallelism request declined + return 1; + } + } + + } \ No newline at end of file diff --git a/libnd4j/include/helpers/LoopsCoordsHelper.h b/libnd4j/include/helpers/LoopsCoordsHelper.h new file mode 100644 index 000000000..35f9d2063 --- /dev/null +++ b/libnd4j/include/helpers/LoopsCoordsHelper.h @@ -0,0 +1,440 @@ +/******************************************************************************* + * + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + // + // @author AbdelRauf + // +#ifndef LIBND4J_LOOPCOORDSHELPER_H +#define LIBND4J_LOOPCOORDSHELPER_H + +#include +#include +#include +#include +#include +namespace nd4j { + +#if defined(__GNUC__) +#define likely(x) __builtin_expect( (x), 1) +#define unlikely(x) __builtin_expect( (x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + + using zip_size_t = std::pair; + + template + struct CoordsState :CoordsState { + Nd4jLong coord; + Nd4jLong last_num; + Nd4jLong stride; + Nd4jLong adjust; + CoordsState() :CoordsState() {} + }; + + template<> + struct CoordsState<0> { + Nd4jLong coord; + Nd4jLong last_num; + Nd4jLong stride; + Nd4jLong adjust; + CoordsState() {} + }; + + + template + struct ZipCoordsState :ZipCoordsState { + Nd4jLong coord; + Nd4jLong last_num; + Nd4jLong stride1; + Nd4jLong stride2; + Nd4jLong adjust1; + Nd4jLong adjust2; + ZipCoordsState() : ZipCoordsState() {} + }; + + template<> + struct ZipCoordsState<0> { + Nd4jLong coord; + Nd4jLong last_num; + Nd4jLong stride1; + Nd4jLong stride2; + Nd4jLong adjust1; + Nd4jLong adjust2; + ZipCoordsState() {} + }; + +#define COORDS(x,index) ((x).::nd4j::CoordsState<(index)>::coord) +#define STRIDE(x,index) ((x).::nd4j::CoordsState<(index)>::stride) +#define LAST_NUM(x,index) ((x).::nd4j::CoordsState<(index)>::last_num) +#define OF_ADJUST(x,index) ((x).::nd4j::CoordsState<(index)>::adjust) +#define ZIP_LAST_NUM(x,index) ((x).::nd4j::ZipCoordsState<(index)>::last_num) +#define ZIP_COORDS(x,index) ((x).::nd4j::ZipCoordsState<(index)>::coord) +#define ZIP_STRIDE1(x,index) ((x).::nd4j::ZipCoordsState<(index)>::stride1) +#define ZIP_STRIDE2(x,index) ((x).::nd4j::ZipCoordsState<(index)>::stride2) +#define ZIP_OF_ADJUST1(x,index) ((x).::nd4j::ZipCoordsState<(index)>::adjust1) +#define ZIP_OF_ADJUST2(x,index) ((x).::nd4j::ZipCoordsState<(index)>::adjust2) + + + FORCEINLINE void index2coords_C(Nd4jLong index, const Nd4jLong rank, const Nd4jLong* bases, Nd4jLong* coords) { + for (size_t i = rank - 1; i > 0; --i) { + coords[i] = index % bases[i]; + index /= bases[i]; + } + coords[0] = index; // last iteration + } + + FORCEINLINE void index2coords_F(Nd4jLong index, const Nd4jLong rank, const Nd4jLong* bases, Nd4jLong* coords) { + + for (size_t i = 0; i < rank - 1; i++) { + coords[i] = index % bases[i]; + index /= bases[i]; + } + coords[rank - 1] = index; // last iteration + } + + FORCEINLINE size_t offset_from_coords(const Nd4jLong* strides, const Nd4jLong* coords, const Nd4jLong& rank) { + + size_t offset = 0; + size_t rank_4 = rank & -4; + for (int i = 0; i < rank_4; i += 4) { + offset = offset + + coords[i] * strides[i] + + coords[i + 1] * strides[i + 1] + + coords[i + 2] * strides[i + 2] + + coords[i + 3] * strides[i + 3]; + } + for (int i = rank_4; i < rank; i++) { + offset += coords[i] * strides[i]; + } + return offset; + } + + + FORCEINLINE zip_size_t offset_from_coords(const Nd4jLong*& x_strides, const Nd4jLong*& z_strides, const Nd4jLong* coords, const Nd4jLong& rank) { + + zip_size_t offset = { 0,0 }; + size_t rank_4 = rank & -4; + for (int i = 0; i < rank_4; i += 4) { + offset.first = offset.first + + coords[i] * x_strides[i] + + coords[i + 1] * x_strides[i + 1] + + coords[i + 2] * x_strides[i + 2] + + coords[i + 3] * x_strides[i + 3]; + offset.second = offset.second + + coords[i] * z_strides[i] + + coords[i + 1] * z_strides[i + 1] + + coords[i + 2] * z_strides[i + 2] + + coords[i + 3] * z_strides[i + 3]; + } + for (int i = rank_4; i < rank; i++) { + offset.first += coords[i] * x_strides[i]; + offset.second += coords[i] * z_strides[i]; + } + return offset; + } + + template + constexpr size_t StridesOrderInd() { + return Last_Index_Faster ? Rank - Index - 1 : Index; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == Index), size_t>::type + coord_inc_n(CoordsState& cbs, size_t last_offset) { + + constexpr size_t Ind = StridesOrderInd(); + + if (likely(COORDS(cbs, Ind) < LAST_NUM(cbs, Ind))) { + last_offset += cbs.CoordsState::stride; + COORDS(cbs, Ind) = COORDS(cbs, Ind) + 1; + return last_offset; + } + //overflow case should not happen + COORDS(cbs, Ind) = 0; + //last_offset = 0;// last_offset + strides[Ind] - adjust_stride; + return 0; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != Index), size_t >::type + coord_inc_n(CoordsState& cbs, size_t last_offset) { + + constexpr size_t Ind = StridesOrderInd(); + + if (likely(COORDS(cbs, Ind) < LAST_NUM(cbs, Ind))) { + last_offset = last_offset + cbs.CoordsState::stride; + COORDS(cbs, Ind) = COORDS(cbs, Ind) + 1; + } + else { + //lets adjust offset + last_offset -= OF_ADJUST(cbs, Ind); + COORDS(cbs, Ind) = 0; + last_offset = coord_inc_n(cbs, last_offset); + } + + return last_offset; + + } + + template + FORCEINLINE size_t inc_coords(CoordsState& cbs, size_t last_offset) { + + return coord_inc_n(cbs,/* 1,*/ last_offset/*, 0*/); + } + + template + FORCEINLINE size_t inc_coords_ews(CoordsState& cbs, size_t last_offset, size_t ews) { + if (ews == 1) { + constexpr size_t Ind = StridesOrderInd(); + return last_offset + STRIDE(cbs, Ind); + } + return coord_inc_n(cbs,/* 1,*/ last_offset/*, 0*/); + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == rankIndex), zip_size_t>::type + coord_inc_n(ZipCoordsState& cbs, zip_size_t last_offset) { + + constexpr size_t Ind = StridesOrderInd(); + + if (likely(ZIP_COORDS(cbs, Ind) < ZIP_LAST_NUM(cbs, Ind))) { + last_offset.first += ZIP_STRIDE1(cbs, Ind); + last_offset.second += ZIP_STRIDE2(cbs, Ind); + ZIP_COORDS(cbs, Ind) = ZIP_COORDS(cbs, Ind) + 1; + return last_offset; + } + //overflow case should not happen + ZIP_COORDS(cbs, Ind) = 0; + //last_offset = 0;// last_offset + strides[Ind] - adjust_stride; + return { 0,0 }; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != rankIndex), zip_size_t >::type + coord_inc_n(ZipCoordsState& cbs, zip_size_t last_offset) { + + constexpr size_t Ind = StridesOrderInd(); + + if (likely(ZIP_COORDS(cbs, Ind) < ZIP_LAST_NUM(cbs, Ind))) { + last_offset.first += ZIP_STRIDE1(cbs, Ind); + last_offset.second += ZIP_STRIDE2(cbs, Ind); + ZIP_COORDS(cbs, Ind) = ZIP_COORDS(cbs, Ind) + 1; + } + else { + + //lets adjust offset + last_offset.first -= ZIP_OF_ADJUST1(cbs, Ind); + last_offset.second -= ZIP_OF_ADJUST2(cbs, Ind); + ZIP_COORDS(cbs, Ind) = 0; + last_offset = coord_inc_n(cbs, last_offset); + } + + return last_offset; + + } + + template + FORCEINLINE zip_size_t inc_coords(ZipCoordsState& cbs, zip_size_t last_offset) { + + return coord_inc_n(cbs, last_offset); + } + + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == rankIndex), size_t>::type + init_coords(CoordsState& cbs, const Nd4jLong index, const Nd4jLong* bases, const Nd4jLong* strides, size_t offset = 0) { + constexpr size_t Ind = StridesOrderInd(); + COORDS(cbs, Ind) = index % bases[Ind]; + LAST_NUM(cbs, Ind) = bases[Ind] - 1; + STRIDE(cbs, Ind) = strides[Ind]; + OF_ADJUST(cbs, Ind) = bases[Ind] * strides[Ind] - strides[Ind]; + offset += COORDS(cbs, Ind) * strides[Ind]; + return offset; + } + + + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != rankIndex), size_t>::type + init_coords(CoordsState& cbs, const Nd4jLong index, const Nd4jLong* bases, const Nd4jLong* strides, size_t offset = 0) { + constexpr size_t Ind = StridesOrderInd(); + COORDS(cbs, Ind) = index % bases[Ind]; + LAST_NUM(cbs, Ind) = bases[Ind] - 1; + STRIDE(cbs, Ind) = strides[Ind]; + OF_ADJUST(cbs, Ind) = bases[Ind] * strides[Ind] - strides[Ind]; + offset += COORDS(cbs, Ind) * strides[Ind]; + return init_coords(cbs, index / bases[Ind], bases, strides, offset); + } + + + + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == rankIndex), bool>::type + eq_coords(CoordsState& cbs, const Nd4jLong* coords) { + return COORDS(cbs, rankIndex) == coords[rankIndex]; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != rankIndex), bool>::type + eq_coords(CoordsState& cbs, const Nd4jLong* coords) { + return COORDS(cbs, rankIndex) == coords[rankIndex] && eq_coords(cbs, coords); + } + + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == rankIndex), bool>::type + eq_zip_coords(ZipCoordsState& cbs, const Nd4jLong* coords) { + return ZIP_COORDS(cbs, rankIndex) == coords[rankIndex]; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != rankIndex), bool>::type + eq_zip_coords(ZipCoordsState& cbs, const Nd4jLong* coords) { + return ZIP_COORDS(cbs, rankIndex) == coords[rankIndex] && eq_zip_coords(cbs, coords); + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 == rankIndex), zip_size_t>::type + init_coords(ZipCoordsState& cbs, const Nd4jLong index, const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, zip_size_t offset = {}) { + constexpr size_t Ind = StridesOrderInd(); + ZIP_COORDS(cbs, Ind) = index % bases[Ind]; + ZIP_LAST_NUM(cbs, Ind) = bases[Ind] - 1; + ZIP_STRIDE1(cbs, Ind) = x_strides[Ind]; + ZIP_STRIDE2(cbs, Ind) = z_strides[Ind]; + ZIP_OF_ADJUST1(cbs, Ind) = ZIP_LAST_NUM(cbs, Ind) * ZIP_STRIDE1(cbs, Ind); + ZIP_OF_ADJUST2(cbs, Ind) = ZIP_LAST_NUM(cbs, Ind) * ZIP_STRIDE2(cbs, Ind); + offset.first += ZIP_COORDS(cbs, Ind) * ZIP_STRIDE1(cbs, Ind); + offset.second += ZIP_COORDS(cbs, Ind) * ZIP_STRIDE2(cbs, Ind); + return offset; + } + + template + FORCEINLINE + typename std::enable_if<(Rank - 1 != rankIndex), zip_size_t>::type + init_coords(ZipCoordsState& cbs, const Nd4jLong index, const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, zip_size_t offset = {}) { + constexpr size_t Ind = StridesOrderInd(); + ZIP_COORDS(cbs, Ind) = index % bases[Ind]; + ZIP_LAST_NUM(cbs, Ind) = bases[Ind] - 1; + ZIP_STRIDE1(cbs, Ind) = x_strides[Ind]; + ZIP_STRIDE2(cbs, Ind) = z_strides[Ind]; + ZIP_OF_ADJUST1(cbs, Ind) = ZIP_LAST_NUM(cbs, Ind) * ZIP_STRIDE1(cbs, Ind); + ZIP_OF_ADJUST2(cbs, Ind) = ZIP_LAST_NUM(cbs, Ind) * ZIP_STRIDE2(cbs, Ind); + offset.first += ZIP_COORDS(cbs, Ind) * ZIP_STRIDE1(cbs, Ind); + offset.second += ZIP_COORDS(cbs, Ind) * ZIP_STRIDE2(cbs, Ind); + return init_coords(cbs, index / bases[Ind], bases, x_strides, z_strides, offset); + } + + + //inc coords for non constant Ranks + template + FORCEINLINE size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* strides, Nd4jLong* coords, size_t last_offset, const size_t rank, const size_t skip = 0) { + + Nd4jLong val; + for (int i = rank - skip - 1; i >= 0; i--) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + last_offset += strides[i]; + break; + } + else { + last_offset -= coords[i] * strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + + template<> + FORCEINLINE size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* strides, Nd4jLong* coords, size_t last_offset, const size_t rank, const size_t skip) { + + Nd4jLong val; + for (int i = skip; i < rank; i++) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + last_offset += strides[i]; + break; + } + else { + last_offset -= coords[i] * strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + + + template + FORCEINLINE zip_size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, Nd4jLong* coords, zip_size_t last_offset, const size_t rank, const size_t skip = 0) { + + Nd4jLong val = 0; + for (int i = rank - skip - 1; i >= 0; i--) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + last_offset.first += x_strides[i]; + last_offset.second += z_strides[i]; + break; + } + else { + last_offset.first -= coords[i] * x_strides[i]; + last_offset.second -= coords[i] * z_strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + + template<> + FORCEINLINE zip_size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, Nd4jLong* coords, zip_size_t last_offset, const size_t rank, const size_t skip) { + + Nd4jLong val = 0; + for (int i = skip; i < rank; i++) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + + last_offset.first += x_strides[i]; + last_offset.second += z_strides[i]; + break; + } + else { + last_offset.first -= coords[i] * x_strides[i]; + last_offset.second -= coords[i] * z_strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp index a910a854c..e5242a5be 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -14,161 +15,612 @@ * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ -// -// @author Yurii Shyrma, created on 26.02.2018 -// + // + // @author Yurii Shyrma, created on 26.02.2018 + // + // + // @author AbdelRauf + // - -#include +#include +#include +#include +#include #include +#include +#include +#include -namespace nd4j { -namespace ops { -namespace helpers { +#if defined(__GNUC__) +#define align32 __attribute__((aligned(32))) +#elif defined(_MSC_VER) +#define align32 __declspec(align(32)) +#else +#define align32 +#endif + +namespace nd4j { + namespace ops { + namespace helpers { + + template + static FORCEINLINE void _add(const T* __restrict xx, const T* __restrict yy, T* __restrict zz, const size_t& N) { + PRAGMA_OMP_SIMD + for (size_t c = 0; c < N; c++) + zz[c] = xx[c] + yy[c]; + } + + template + static FORCEINLINE void _add_inplace(T* __restrict xx, const T* __restrict yy, const size_t& N) { + PRAGMA_OMP_SIMD + for (size_t c = 0; c < N; c++) + xx[c] = xx[c] + yy[c]; + } + + template + static FORCEINLINE void _add_broadcast_inplace(T* __restrict xx, const T yy, const size_t& N) { + PRAGMA_OMP_SIMD + for (size_t c = 0; c < N; c++) + xx[c] = xx[c] + yy; + } + + template + static FORCEINLINE void _add_broadcast(const T* __restrict xx, const T yy, T* __restrict zz, const size_t& N) { + PRAGMA_OMP_SIMD + for (size_t c = 0; c < N; c++) + zz[c] = xx[c] + yy; + } + + static constexpr size_t MIN_NN = 32; + static constexpr size_t MIN_NN_K = 2; + + template + static typename std::enable_if::value, const X*>::type + flattened_bias(const Y* b_real, X* b_stack, const size_t b_stack_size, std::unique_ptr& b_heap, const Nd4jLong num, Nd4jLong yStrideC) + { + //best results when buffer used much , may result bad perf if buffer is used once + X* b_new = nullptr; + if (yStrideC != 1) { + if (num > b_stack_size) { + b_heap.reset(new X[num]); + b_new = b_heap.get(); + } + else { + b_new = b_stack; + } + for (size_t i = 0; i < num; i++) { + b_new[i] = b_real[i * yStrideC]; + } + } + else { + //no need , just pass normal bias + return static_cast(b_real); + } + return const_cast(b_new); + } + + template + static typename std::enable_if::value, const X*>::type + flattened_bias(const Y* b_real, X* b_stack, const size_t b_stack_size, std::unique_ptr& b_heap, const Nd4jLong num, Nd4jLong yStrideC) + { + //best results when buffer used much , may result bad perf if buffer is used once + X* b_new = nullptr; + if (num > b_stack_size) { + b_heap.reset(new X[num]); + b_new = b_heap.get(); + } + else { + b_new = b_stack; + } + if (yStrideC != 1) { + for (size_t i = 0; i < num; i++) { + b_new[i] = static_cast(b_real[i * yStrideC]); + } + } + else { + for (size_t i = 0; i < num; i++) { + b_new[i] = static_cast(b_real[i]); + } + } + return const_cast(b_new); + } + + template + static void channel_atTheEnd_stride1_C(const Nd4jLong*& x_strides, const Nd4jLong*& bases, T* x, const T* b, T* z, const bool& inplace, const Nd4jLong& start, const Nd4jLong& stop, const Nd4jLong& inc) + { + size_t loop_count = (stop - start) / inc; + nd4j::CoordsState cst; + size_t offset = nd4j::init_coords(cst, start, bases, x_strides); + + if (!inplace) { + for (size_t i = 0; i < loop_count; i++) { + _add(&(x[offset]), b, &(z[offset]), inc); + offset = nd4j::inc_coords(cst, offset); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + _add_inplace(&(x[offset]), b, inc); + offset = nd4j::inc_coords(cst, offset); + } + } + } -////////////////////////////////////////////////////////////////////////// -template -static void addBias_(const NDArray& input, const NDArray& bias, NDArray &output, const bool isNCHW) { + template + static void channel_atTheEnd_generic_C(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, const bool& inplaceOp, const bool same_stride, const bool same_order, T* x, const T* b, T* z, Nd4jLong start, Nd4jLong stop, Nd4jLong inc) { - // bias [oC] + //just ensure that passed sameStride is correct, because when bases are equal orders matters + bool sameOrderStride = same_order && same_stride; + if (sameOrderStride && x_strides[constRank - 1] == 1) { + channel_atTheEnd_stride1_C(x_strides, bases, x, b, z, inplaceOp, start, stop, inc); + } + else { + size_t loop_count = (stop - start) / inc; + nd4j::ZipCoordsState cst; + nd4j::zip_size_t offset = nd4j::init_coords(cst, start, bases, x_strides, z_strides); + Nd4jLong x_stride = ZIP_STRIDE1(cst, constRank - 1); + Nd4jLong z_stride = ZIP_STRIDE2(cst, constRank - 1); - // if(input_rank == 4) - // input and output have same shapes: [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW) - // if(input_rank == 5) - // input and output have same shapes: [bS, oD, oH, oW, oC] (NHWC) or [bS, oD, oC, oH, oW] (NCHW) - // else - // apply applyBroadCast + if (same_order && x_stride == 1 && z_stride == 1) { + /* bases are equal with different strides , but the last one is 1. So we can still vectorize it */ + for (size_t i = 0; i < loop_count; i++) { + _add(&(x[offset.first]), b, &(z[offset.second]), inc); + offset = nd4j::inc_coords(cst, offset); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T* xx = &(x[offset.first]); + T* zz = &(z[offset.second]); + for (size_t j = 0; j < inc; j++) + zz[j * z_stride] = xx[j * x_stride] + b[j]; + offset = nd4j::inc_coords(cst, offset); + } + } + } + + } + + /** + * this is our main optimization which benefits from everything for the continuous last_channel C order case + * as it is intended for full continous we do not need any rank info + */ + template + void channel_atTheEnd_continous_C(T* x, const T* b, T* z, bool inplaceOp, Nd4jLong start, Nd4jLong stop, Nd4jLong inc) { + size_t nums = (stop - start); + size_t num_inc = nums - nums % inc; + if (inplaceOp) { + + size_t offset_p = start; + for (size_t i = 0; i < num_inc; i += inc) { + _add_inplace(&(x[offset_p]), b, inc); + offset_p += inc; + } + if (nums > num_inc) + _add_inplace(&(x[offset_p]), b, nums - num_inc); + } + else { + size_t offset_p = start; + for (size_t i = 0; i < num_inc; i += inc) { + _add(&(x[offset_p]), b, &(z[offset_p]), inc); + offset_p += inc; + } + if (nums > num_inc) + _add(&(x[offset_p]), b, &(z[offset_p]), nums - num_inc); + } + } + + template + static void channel_NC_stride1_C(const Nd4jLong*& x_strides, const Nd4jLong*& bases, T* x, const T2* b, T* z, const bool& inplace, const Nd4jLong yStrideC, const Nd4jLong& start, const Nd4jLong& stop, const Nd4jLong& inc) + { + size_t loop_count = (stop - start) / inc; + nd4j::CoordsState cst; + size_t offset = nd4j::init_coords(cst, start, bases, x_strides); + + if (!inplace) { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, 1) * yStrideC]); + _add_broadcast(&(x[offset]), yy, &(z[offset]), inc); + offset = nd4j::inc_coords(cst, offset); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, 1) * yStrideC]); + _add_broadcast_inplace(&(x[offset]), yy, inc); + offset = nd4j::inc_coords(cst, offset); + } + } + } + + template + void channel_NC_generic_C(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, const bool& inplaceOp, const bool same_stride, const bool same_order, const Nd4jLong yStrideC, T* x, const T2* b, T* z, Nd4jLong start, Nd4jLong stop, Nd4jLong inc) { + + //just ensure that passed sameStride is correct, because when bases are equal orders matters + + bool sameOrderStride = same_order && same_stride; + + if (sameOrderStride && x_strides[constRank - 1] == 1) { + channel_NC_stride1_C(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, inc); + } + else { + + // (stop-start) % inc == 0 because we handled inside partitioning using the channel size + size_t loop_count = (stop - start) / inc; + nd4j::ZipCoordsState cst; + nd4j::zip_size_t offset = nd4j::init_coords(cst, start, bases, x_strides, z_strides); + Nd4jLong x_stride = ZIP_STRIDE1(cst, constRank - 1); + Nd4jLong z_stride = ZIP_STRIDE2(cst, constRank - 1); + if (same_order && z_stride == 1 && x_stride == 1) { + /* bases are equal with different strides , but the last one is 1. So we can still vectorize it */ + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[ZIP_COORDS(cst, 1) * yStrideC]); + _add_broadcast(&(x[offset.first]), yy, &(z[offset.second]), inc); + offset = nd4j::inc_coords(cst, offset); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T* xx = &(x[offset.first]); + T* zz = &(z[offset.second]); + T yy = static_cast(b[ZIP_COORDS(cst, 1) * yStrideC]); + for (size_t j = 0; j < inc; j++) + zz[j * z_stride] = xx[j * x_stride] + yy; + offset = nd4j::inc_coords(cst, offset); + } + } + } + } + + /// + template + void channel_NC_continous_numHW_C(Nd4jLong rank, const Nd4jLong* bases, const Nd4jLong* x_strides, T* x, const T2* b, T* z, bool inplaceOp, const Nd4jLong yStrideC, Nd4jLong start, Nd4jLong stop, Nd4jLong inc) { + + // (stop-start) % inc == 0 because we handled inside partitioning using the channel size + size_t loop_count = (stop - start) / inc; + + nd4j::CoordsState<1> cst; + //note: we had to manually pass index + size_t offset_p = nd4j::init_coords<2>(cst, start / inc, bases, x_strides); + + //partitioning was done using numHW, so we can increment from rank 2 + if (inplaceOp) { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, 1) * yStrideC]); + _add_broadcast_inplace(&(x[offset_p]), yy, inc); + offset_p = nd4j::inc_coords<2>(cst, offset_p); + } + } + else { + if (yStrideC == 1) { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, 1)]); + _add_broadcast(&(x[offset_p]), yy, &(z[offset_p]), inc); + offset_p = nd4j::inc_coords<2>(cst, offset_p); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, 1) * yStrideC]); + _add_broadcast(&(x[offset_p]), yy, &(z[offset_p]), inc); + offset_p = nd4j::inc_coords<2>(cst, offset_p); + } + } + } + } + + // + template + static void channel_generic_stride_skip_F(const Nd4jLong*& x_strides, const Nd4jLong*& bases, T* x, const T2* b, T* z, const bool& inplace, const Nd4jLong yStrideC, const Nd4jLong& start, const Nd4jLong& stop, const Nd4jLong& inc) + { + // (stop-start) % inc == 0 because we handled inside partitioning using the channel size + size_t loop_count = (stop - start) / inc; + nd4j::CoordsState cst; + size_t offset_p = nd4j::init_coords(cst, start, bases, x_strides); + if (!inplace) { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, b_index) * yStrideC]); + _add_broadcast(&(x[offset_p]), yy, &(z[offset_p]), inc); + offset_p = nd4j::inc_coords(cst, offset_p); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[COORDS(cst, b_index) * yStrideC]); + _add_broadcast_inplace(&(x[offset_p]), yy, inc); + offset_p = nd4j::inc_coords(cst, offset_p); + } + } + } + + /// + template + void channel_generic_F(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* z_strides, const bool& inplaceOp, const bool same_stride, const bool same_order, const Nd4jLong yStrideC, T* x, const T2* b, T* z, Nd4jLong start, Nd4jLong stop, Nd4jLong inc) { + //just ensure that passed sameStride is correct, because when bases are equal orders matters + bool sameOrderStride = same_order && same_stride; + if (sameOrderStride && x_strides[0] == 1) { + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, inc); + } + else { + // (stop-start) % inc == 0 because we handled inside partitioning using the channel size + + size_t loop_count = (stop - start) / inc; + nd4j::ZipCoordsState cst; + nd4j::zip_size_t offset = nd4j::init_coords(cst, start, bases, x_strides, z_strides); + Nd4jLong x_stride = ZIP_STRIDE1(cst, 0); + Nd4jLong z_stride = ZIP_STRIDE2(cst, 0); + if (same_order && z_stride == 1 && x_stride == 1) { + + for (size_t i = 0; i < loop_count; i++) { + T yy = static_cast(b[ZIP_COORDS(cst, b_index) * yStrideC]); + _add_broadcast(&(x[offset.first]), yy, &(z[offset.second]), inc); + offset = nd4j::inc_coords(cst, offset); + } + } + else { + for (size_t i = 0; i < loop_count; i++) { + T* xx = &(x[offset.first]); + T* zz = &(z[offset.second]); + T yy = static_cast(b[ZIP_COORDS(cst, b_index) * yStrideC]); + for (size_t j = 0; j < inc; j++) + zz[j * z_stride] = xx[j * x_stride] + yy; + offset = nd4j::inc_coords(cst, offset); + } + } + } + } - const X* x = input.bufferAsT(); - const Y* y = bias.bufferAsT(); - X* z = output.bufferAsT(); + template + static void addBias_(const NDArray& input, const NDArray& bias, NDArray& output, const bool isNCHW) { + Nd4jLong* x_shapeInfo = input.getShapeInfo(); + Nd4jLong* z_shapeInfo = output.getShapeInfo(); + X* x = input.bufferAsT(); + X* z = output.bufferAsT(); + const Y* b = bias.bufferAsT(); + const Nd4jLong rank = x_shapeInfo[0]; + const Nd4jLong* bases = &(x_shapeInfo[1]); + const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); + const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + const bool inplaceOp = (x == z); + const bool same_order = inplaceOp || (input.ordering() == output.ordering()); + const bool channel_atTheEnd = !isNCHW; + const bool same_stride = inplaceOp || shape::strideEquals(x_shapeInfo, z_shapeInfo); + bool isContinuous = false; + int posOfNonUnityDim; + bias.isCommonVector(posOfNonUnityDim); + const Nd4jLong yStrideC = bias.strideAt(posOfNonUnityDim); + char order = input.ordering(); - const bool inOutAreSame = x == z; + //for rank>5 + if (rank > 5) { + const int channelDim = isNCHW ? 1 : input.rankOf() - 1; // second or last + const_cast(input).applyBroadcast(nd4j::broadcast::Add, { channelDim }, bias, output); + return; + } - int posOfNonUnityDim; - bias.isCommonVector(posOfNonUnityDim); + if (same_order && same_stride) { + isContinuous = shape::elementWiseStride(x_shapeInfo) == 1 && shape::elementWiseStride(z_shapeInfo) == 1; + // check_continuity(order, bases, x_strides, rank); + }//if ( sameOrder && same_stride) - const uint bS = output.sizeAt(0); // batch size - const Nd4jLong yStrideC = bias.strideAt(posOfNonUnityDim); - const Nd4jLong zStrideB = output.strideAt(0); + bool treat_as_lastC = false; + // + if (rank == 2 && isNCHW) { + //we believe we better treat it as channel at the end case; + treat_as_lastC = true; + } + if (channel_atTheEnd || treat_as_lastC) { + //N..HWC case here + //flattened bias variables + constexpr size_t BSIZE1 = 3 * MIN_NN * MIN_NN; + constexpr size_t BSIZE2 = BSIZE1 + MIN_NN * MIN_NN; + X flatBias_stack[BSIZE2] align32; + std::unique_ptr flatBias_heap; + const X* bias_new; + X* bias_extra = nullptr; + size_t total_num = 1; + for (size_t i = 0; i < rank; i++) { + total_num *= bases[i]; + } + Nd4jLong inc; + size_t rank_skip = 1; + if (order == 'c') { + size_t b_stack_size = BSIZE2; + inc = bases[rank - 1]; + if (isContinuous) { + //for continous we need extra stack memory + // to create vectorizable bias from small size + b_stack_size = BSIZE1; + bias_extra = &(flatBias_stack[BSIZE1]); + } + bias_new = flattened_bias(b, (X*)flatBias_stack, b_stack_size, flatBias_heap, inc, yStrideC); + if (isContinuous && inc < MIN_NN_K * MIN_NN && total_num > inc * MIN_NN_K) { + //for small size where total_num is sufficient we need to recreate vectorizable buffer + size_t old_inc = inc; + //sizeof bias_extra is MIN_NN * MIN_NN + size_t new_inc = inc < MIN_NN ? inc * MIN_NN : inc * MIN_NN / MIN_NN_K; + //if there is a room then lets multiply + new_inc = (new_inc * MIN_NN_K <= total_num && new_inc < MIN_NN * MIN_NN / MIN_NN_K) ? MIN_NN_K * new_inc : new_inc; + for (size_t i = 0; i < new_inc; i += inc) { + //copy to our buffer + X* cp = &(bias_extra[i]); + for (size_t j = 0; j < inc; j++) { + cp[j] = bias_new[j]; + } + } + //vectorizable buffer + inc = new_inc; + bias_new = bias_extra; + } + } + else { + inc = bases[0]; + if (isContinuous) { + //we can choose other inc and index for that case + //but for now lets choose all till the last one + uint32_t req_numThreads = nd4j::Environment::getInstance()->maxMasterThreads(); + isContinuous = false; + if (rank > 2) { + if (req_numThreads < 2 || bases[rank - 1] >= req_numThreads) { + inc = total_num / bases[rank - 1]; + isContinuous = true; + rank_skip = rank - 1; + } + else if (rank > 3 && bases[rank - 1] * bases[rank - 2] >= req_numThreads) { + inc = total_num / bases[rank - 1] / bases[rank - 2]; //for continuous case it is its stride + rank_skip = rank - 2; + isContinuous = true; + } + } + } + } - if(output.rankOf() == 4) { + FUNC_1D func = [order, isContinuous, rank, x, b, bias_new, z, x_shapeInfo, z_shapeInfo, same_stride, same_order, yStrideC, rank_skip] + (uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { + const Nd4jLong rank = x_shapeInfo[0]; + const Nd4jLong* bases = &(x_shapeInfo[1]); + const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); + const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + const bool inplaceOp = (x == z); + if (order == 'c') { + if (isContinuous) { + channel_atTheEnd_continous_C(x, bias_new, z, inplaceOp, start, stop, increment); + } + // rank is in [2,5] + else if (rank == 4) { + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); - const uint C = isNCHW ? output.sizeAt(1) : output.sizeAt(3); // channels - const uint oH = isNCHW ? output.sizeAt(2) : output.sizeAt(1); // height - const uint oW = isNCHW ? output.sizeAt(3) : output.sizeAt(2); // width + } + else if (rank == 5) { + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + } + else if (rank == 2) { + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + } + else if (rank == 3) { + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + } + } + else { + //generic F case + if (isContinuous) { + if (rank == 4) { + if (rank_skip == rank - 2) { + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + else { + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + } + else if (rank == 5) { + if (rank_skip == rank - 2) { + //skip==3 + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + else { + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + } + else if (rank == 3) { + channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + } + else if (rank == 4) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 5) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 2) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 3) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } - const Nd4jLong zStrideC = isNCHW ? output.stridesOf()[1] : output.stridesOf()[3]; - const Nd4jLong zStrideH = isNCHW ? output.stridesOf()[2] : output.stridesOf()[1]; - const Nd4jLong zStrideW = isNCHW ? output.stridesOf()[3] : output.stridesOf()[2]; + } + }; + // + samediff::Threads::parallel_aligned_increment(func, 0, total_num, inc); + } + else { + //NC...HW case here + size_t numNC = 1; + size_t numHW = 1; + for (size_t i = 0; i < 2; i++) { + numNC *= bases[i]; + } + for (size_t i = 2; i < rank; i++) { + numHW *= bases[i]; + } + Nd4jLong total_num = numNC * numHW; + Nd4jLong inc = (order == 'c') ? bases[rank - 1] : bases[0]; + if (order == 'c' && isContinuous) { + //sometimes last dimension is too big and multithreading could suffer using unfair partitioning + //so we will do it only when inc is smaller our value or multithreading turned off + uint32_t req_numThreads = nd4j::Environment::getInstance()->maxMasterThreads(); + if (req_numThreads < 2 || numNC >= req_numThreads || inc <= 2 * 8196 || rank == 3) { + inc = numHW; + } + else { + //treat it as stride1c case + isContinuous = false; + } + } + FUNC_1D func = [order, isContinuous, rank, x, b, z, x_shapeInfo, z_shapeInfo, same_stride, same_order, yStrideC] + (uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { + const Nd4jLong rank = x_shapeInfo[0]; + const Nd4jLong* bases = &(x_shapeInfo[1]); + const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); + const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + const bool inplaceOp = (x == z); + if (order == 'c') { + if (isContinuous) { + channel_NC_continous_numHW_C(rank, bases, x_strides, x, b, z, inplaceOp, yStrideC, start, stop, increment); + } + // rank is in [3,5] + else if (rank == 4) { + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); - if(inOutAreSame) { + } + else if (rank == 5) { + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 3) { + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + } + else { + //the same can be applied for NCHW case + //generic F case + //continous case is missing - auto func = PRAGMA_THREADS_FOR_3D { - for (uint b = start_x; b < stop_x; b += inc_x) - for (uint c = start_y; c < stop_y; c += inc_y) - for (uint h = start_z; h < stop_z; h += inc_z) - for (uint w = 0; w < oW; ++w) - z[b * zStrideB + c * zStrideC + h * zStrideH + w * zStrideW] += static_cast(y[c * yStrideC]); - }; + if (rank == 4) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 5) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + else if (rank == 3) { + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + } + } + }; + // + samediff::Threads::parallel_aligned_increment(func, 0, total_num, inc); + } + } + ////////////////////////////////////////////////////////////////////////// + void addBias(nd4j::graph::Context& block, const NDArray& input, const NDArray& bias, NDArray& output, const bool isNCHW) { - samediff::Threads::parallel_for(func, 0, bS, 1, 0, C, 1, 0, oH, 1); - } - else { + // bias.rankOf() == 1 ? bias : bias.reshape(bias.ordering(), {bias.lengthOf()}) + BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBias_, (input, bias, output, isNCHW), FLOAT_TYPES, FLOAT_TYPES); + } - const Nd4jLong xStrideB = input.stridesOf()[0]; - const Nd4jLong xStrideC = isNCHW ? input.stridesOf()[1] : input.stridesOf()[3]; - const Nd4jLong xStrideH = isNCHW ? input.stridesOf()[2] : input.stridesOf()[1]; - const Nd4jLong xStrideW = isNCHW ? input.stridesOf()[3] : input.stridesOf()[2]; - if (isNCHW) { - - auto func = PRAGMA_THREADS_FOR_3D { - for (uint b = start_x; b < stop_x; b += inc_x) - for (uint c = start_y; c < stop_y; c += inc_y) - for (uint h = start_z; h < stop_z; h += inc_z) - for (uint w = 0; w < oW; ++w) - z[b * zStrideB + c * zStrideC + h * zStrideH + w * zStrideW] = x[b * xStrideB + c * xStrideC + h * xStrideH + w * xStrideW] + static_cast(y[c * yStrideC]); - }; - - samediff::Threads::parallel_for(func, 0, bS, 1, 0, C, 1, 0, oH, 1); - } else { - auto func = PRAGMA_THREADS_FOR_3D { - for (uint b = start_x; b < stop_x; b++) - for (uint h = start_y; h < stop_y; h++) - for (uint w = start_z; w < stop_z; w++) - for (uint c = 0; c < C; c++) - z[b * zStrideB + c * zStrideC + h * zStrideH + w * zStrideW] = x[b * xStrideB + c * xStrideC + h * xStrideH + w * xStrideW] + y[c * yStrideC]; - }; - - samediff::Threads::parallel_for(func, 0, bS, 1, 0, oH, 1, 0, oW, 1); - } - } - } - else if(output.rankOf() == 5) { - - const uint C = isNCHW ? output.sizeAt(1) : output.sizeAt(4); // channels - const uint oD = isNCHW ? output.sizeAt(2) : output.sizeAt(1); // depth - const uint oH = isNCHW ? output.sizeAt(3) : output.sizeAt(2); // height - const uint oW = isNCHW ? output.sizeAt(4) : output.sizeAt(3); // width - - const Nd4jLong zStrideC = isNCHW ? output.stridesOf()[1] : output.stridesOf()[4]; - const Nd4jLong zStrideD = isNCHW ? output.stridesOf()[2] : output.stridesOf()[1]; - const Nd4jLong zStrideH = isNCHW ? output.stridesOf()[3] : output.stridesOf()[2]; - const Nd4jLong zStrideW = isNCHW ? output.stridesOf()[4] : output.stridesOf()[3]; - - if(inOutAreSame) { - - auto func = PRAGMA_THREADS_FOR_3D { - for (uint b = start_x; b < stop_x; b += inc_x) - for (uint c = start_y; c < stop_y; c += inc_y) - for (uint d = start_z; d < stop_z; d += inc_z) - for (uint h = 0; h < oH; ++h) - for (uint w = 0; w < oW; ++w) - z[b * zStrideB + c * zStrideC + d * zStrideD + h * zStrideH + w * zStrideW] += static_cast(y[c * yStrideC]); - }; - - samediff::Threads::parallel_for(func, 0, bS, 1, 0, C, 1, 0, oD, 1); - } - else { - - const Nd4jLong xStrideB = input.stridesOf()[0]; - const Nd4jLong xStrideC = isNCHW ? input.stridesOf()[1] : input.stridesOf()[4]; - const Nd4jLong xStrideD = isNCHW ? input.stridesOf()[2] : input.stridesOf()[1]; - const Nd4jLong xStrideH = isNCHW ? input.stridesOf()[3] : input.stridesOf()[2]; - const Nd4jLong xStrideW = isNCHW ? input.stridesOf()[4] : input.stridesOf()[3]; - - auto func = PRAGMA_THREADS_FOR_3D { - for (uint b = start_x; b < stop_x; b += inc_x) - for (uint c = start_y; c < stop_y; c += inc_y) - for (uint d = start_z; d < stop_z; d += inc_z) - for (uint h = 0; h < oH; ++h) - for (uint w = 0; w < oW; ++w) - z[b * zStrideB + c * zStrideC + d * zStrideD + h * zStrideH + w * zStrideW] = x[b * xStrideB + c * xStrideC + d * xStrideD + h * xStrideH + w * xStrideW] + static_cast(y[c * yStrideC]); - }; - - samediff::Threads::parallel_for(func, 0, bS, 1, 0, C, 1, 0, oD, 1); - } - } - else { - const int channelDim = isNCHW ? 1 : input.rankOf() - 1; // second or last - const_cast(input).applyBroadcast(nd4j::broadcast::Add, {channelDim}, bias, output); - } + BUILD_DOUBLE_TEMPLATE(template void addBias_, (const NDArray& input, const NDArray& bias, NDArray& output, const bool isNCHW), FLOAT_TYPES, FLOAT_TYPES); + } + } } - -////////////////////////////////////////////////////////////////////////// -void addBias(nd4j::graph::Context& block, const NDArray& input, const NDArray& bias, NDArray& output, const bool isNCHW) { - - // bias.rankOf() == 1 ? bias : bias.reshape(bias.ordering(), {bias.lengthOf()}) - BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBias_, (input, bias, output, isNCHW), FLOAT_TYPES, FLOAT_TYPES); -} - - -BUILD_DOUBLE_TEMPLATE(template void addBias_, (const NDArray& input, const NDArray& bias, NDArray& output, const bool isNCHW), FLOAT_TYPES, FLOAT_TYPES); - -} -} -} - diff --git a/libnd4j/tests_cpu/layers_tests/LoopCoordsHelperTests.cpp b/libnd4j/tests_cpu/layers_tests/LoopCoordsHelperTests.cpp new file mode 100644 index 000000000..1a65c09ae --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/LoopCoordsHelperTests.cpp @@ -0,0 +1,223 @@ +/******************************************************************************* + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + // + // @author Abdelrauf + // + +#include "testlayers.h" +#include +#include +using namespace nd4j; + +class LoopCoordsHelper : public testing::Test { +public: + +}; + + +template +FORCEINLINE +typename std::enable_if<(Rank - 1 == rankIndex), bool>::type +eq_strides(CoordsState& cbs, const Nd4jLong* strides) { + return STRIDE(cbs, rankIndex) == strides[rankIndex]; +} + +template +FORCEINLINE +typename std::enable_if<(Rank - 1 != rankIndex), bool>::type +eq_strides(CoordsState& cbs, const Nd4jLong* strides) { + return STRIDE(cbs, rankIndex) == strides[rankIndex] && eq_strides(cbs, strides); +} + +template +FORCEINLINE +typename std::enable_if<(Rank - 1 == rankIndex), bool>::type +eq_zip_strides(ZipCoordsState& cbs, const Nd4jLong* strides1, const Nd4jLong* strides2) { + return ZIP_STRIDE1(cbs, rankIndex) == strides1[rankIndex] && ZIP_STRIDE2(cbs, rankIndex) == strides2[rankIndex]; +} + +template +FORCEINLINE +typename std::enable_if<(Rank - 1 != rankIndex), bool>::type +eq_zip_strides(ZipCoordsState& cbs, const Nd4jLong* strides1, const Nd4jLong* strides2) { + return ZIP_STRIDE1(cbs, rankIndex) == strides1[rankIndex] && ZIP_STRIDE2(cbs, rankIndex) == strides2[rankIndex] + && eq_zip_strides(cbs, strides1, strides2); +} + + + + +TEST_F(LoopCoordsHelper, Init_Tests) { + + constexpr size_t test_Index = 131; + constexpr size_t Rank = 5; + + Nd4jLong shape[Rank] = { 3, 5 ,7, 8, 9}; + Nd4jLong multiply_st[] = { 2,3,3,5,6,7,9,3 }; + Nd4jLong strides_c[Rank] ; + Nd4jLong strides_f[Rank]; + + Nd4jLong coords[Rank]; + Nd4jLong coords_f[Rank]; + + strides_f[0] = multiply_st[0] * shape[0]; + strides_c[Rank-1] = multiply_st[Rank-1] * shape[Rank-1]; + + for (int i = 1; i < Rank; i++) { + strides_f[i] = strides_f[i - 1] * multiply_st[i] * shape[i]; + } + + for (int i = Rank-2; i >=0; i--) { + strides_c[i] = strides_c[i+1] * multiply_st[i] * shape[i]; + } + + //init our base coords + index2coords_C(test_Index, Rank, shape, coords); + index2coords_F(test_Index, Rank, shape, coords_f); + + + size_t offset_calc = offset_from_coords(strides_c, coords, Rank); + size_t offset_calc_f = offset_from_coords(strides_f, coords_f, Rank); + + CoordsState cts; + CoordsState cts_f; + + ZipCoordsState zcts; + ZipCoordsState zcts_f; + + size_t offset = init_coords(cts, test_Index, shape, strides_c); + size_t offset_f = init_coords(cts_f, test_Index, shape, strides_f); + + zip_size_t zoffset = init_coords(zcts, test_Index, shape, strides_c, strides_c); + zip_size_t zoffset_f = init_coords(zcts_f, test_Index, shape, strides_f, strides_f); + + ASSERT_TRUE(eq_coords(cts, coords)); + ASSERT_TRUE(eq_coords(cts_f, coords_f)); + + ASSERT_TRUE(eq_zip_coords(zcts, coords)); + ASSERT_TRUE(eq_zip_coords(zcts_f, coords_f)); + + ASSERT_TRUE(eq_strides(cts,strides_c)); + ASSERT_TRUE(eq_strides(cts_f,strides_f)); + + ASSERT_TRUE(eq_zip_strides(zcts, strides_c, strides_c)); + ASSERT_TRUE(eq_zip_strides(zcts_f, strides_f, strides_f)); + + + ASSERT_EQ(offset , offset_calc); + ASSERT_EQ(zoffset.first , offset_calc); + ASSERT_EQ(zoffset.second , offset_calc); + ASSERT_EQ(offset_f , offset_calc_f); + ASSERT_EQ(zoffset_f.first , offset_calc_f); + ASSERT_EQ(zoffset_f.second , offset_calc_f); +} + +TEST_F(LoopCoordsHelper, Increment_Use_Tests) { + + + constexpr size_t Rank = 4; + + Nd4jLong shape[Rank] = { 3, 5 ,7, 8 }; + Nd4jLong multiply_st[] = { 2,3,3,5,6,7,9,3 }; + Nd4jLong strides_c[Rank]; + Nd4jLong strides_f[Rank]; + + Nd4jLong coords[Rank] = {}; + Nd4jLong coords_f[Rank] = {}; + Nd4jLong coords2[Rank] = {}; + Nd4jLong coords2_f[Rank] = {}; + Nd4jLong zcoords2[Rank] = {}; + Nd4jLong zcoords2_f[Rank] = {}; + + strides_f[0] = multiply_st[0] * shape[0]; + strides_c[Rank - 1] = multiply_st[Rank - 1] * shape[Rank - 1]; + + for (int i = 1; i < Rank; i++) { + strides_f[i] = strides_f[i - 1] * multiply_st[i] * shape[i]; + } + + for (int i = Rank - 2; i >= 0; i--) { + strides_c[i] = strides_c[i + 1] * multiply_st[i] * shape[i]; + } + + int total = 1; + for (int i = 0; i < Rank; i++) { + total *= shape[i]; + } + + CoordsState cts; + CoordsState cts_f; + + ZipCoordsState zcts; + ZipCoordsState zcts_f; + + size_t offset = init_coords(cts, 0, shape, strides_c); + size_t offset_f = init_coords(cts_f, 0, shape, strides_f); + + zip_size_t zoffset = init_coords(zcts, 0, shape, strides_c, strides_c); + zip_size_t zoffset_f = init_coords(zcts_f, 0, shape, strides_f, strides_f); + + size_t offset2 = 0; + size_t offset2_f = 0; + zip_size_t zoffset2 = {}; + zip_size_t zoffset2_f = {}; + + for (int j = 0; j < total; j++) { + + + index2coords_C(j, Rank, shape, coords); + index2coords_F(j, Rank, shape, coords_f); + + size_t offset_calc = offset_from_coords(strides_c, coords, Rank); + size_t offset_calc_f = offset_from_coords(strides_f, coords_f, Rank); + + + ASSERT_TRUE(eq_coords(cts, coords)); + ASSERT_TRUE(eq_coords(cts_f, coords_f)); + + ASSERT_TRUE(eq_zip_coords(zcts, coords)); + ASSERT_TRUE(eq_zip_coords(zcts_f, coords_f)); + + ASSERT_EQ(offset, offset_calc); + ASSERT_EQ(zoffset.first, offset_calc); + ASSERT_EQ(zoffset.second, offset_calc); + ASSERT_EQ(offset_f, offset_calc_f); + ASSERT_EQ(zoffset_f.first, offset_calc_f); + ASSERT_EQ(zoffset_f.second, offset_calc_f); + + + ASSERT_EQ(offset2, offset_calc); + ASSERT_EQ(zoffset2.first, offset_calc); + ASSERT_EQ(zoffset2.second, offset_calc); + ASSERT_EQ(offset2_f, offset_calc_f); + ASSERT_EQ(zoffset2_f.first, offset_calc_f); + ASSERT_EQ(zoffset2_f.second, offset_calc_f); + + offset = inc_coords(cts, offset); + offset_f = inc_coords(cts_f, offset_f); + zoffset = inc_coords(zcts, zoffset); + zoffset_f = inc_coords(zcts_f, zoffset_f); + + offset2 = inc_coords(shape,strides_c, coords2, offset2, Rank); + offset2_f = inc_coords(shape, strides_f, coords2_f, offset2_f, Rank); + zoffset2 = inc_coords(shape, strides_c, strides_c, zcoords2, zoffset2, Rank); + zoffset2_f = inc_coords(shape, strides_f, strides_f, zcoords2_f, zoffset2_f, Rank); + + } + +} + diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index 4d7a0f783..9f75beca1 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -45,6 +45,7 @@ #include #include +#include using namespace nd4j; using namespace nd4j::graph; @@ -64,6 +65,87 @@ TEST_F(PlaygroundTests, test_avx) { nd4j_printf("Optimal level: %i; Binary level: %i;\n", ::optimalLevel(), ::binaryLevel()); } +/* + +TEST_F(PlaygroundTests, test_s_0) { + std::vector> shapes = {{32, 224, 224, 3}, {32, 56, 56, 64}, {32, 7, 7, 512}}; + std::vector threads = {1, 2, 4, 8, 16}; + + for (auto shape: shapes) { + for (auto t: threads) { + nd4j::Environment::getInstance()->setMaxMasterThreads(t); + + auto x = NDArrayFactory::create('c', shape); + auto y = NDArrayFactory::create('c', {shape[3]}); + auto z = x.ulike(); + + std::vector values; + Context ctx(1); + ctx.setInputArray(0, &x); + ctx.setInputArray(1, &y); + ctx.setOutputArray(0, &z); + + nd4j::ops::biasadd op; + + + for (int e = 0; e < 10000; e++) { + auto timeStart = std::chrono::system_clock::now(); + + op.execute(&ctx); + nd4j::ops::helpers::addBias(ctx, x, y, z, false); + + auto timeEnd = std::chrono::system_clock::now(); + auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); + values.emplace_back(outerTime); + } + + std::sort(values.begin(), values.end()); + + nd4j_printf("Shape: [%lld, %lld, %lld, %lld]; Threads: [%i]; Time: %lld us;\n", shape[0], shape[1], shape[2], shape[3], t, values[values.size() / 2]); + } + } +} + +TEST_F(PlaygroundTests, test_s_1) { + std::vector> shapes = {{32, 3, 224, 224}, {32, 64, 56, 56}, {32, 512, 7, 7}}; + std::vector threads = {1, 2, 4, 8, 16}; + + for (auto shape: shapes) { + for (auto t: threads) { + nd4j::Environment::getInstance()->setMaxMasterThreads(t); + + auto x = NDArrayFactory::create('c', shape); + auto y = NDArrayFactory::create('c', {shape[1]}); + auto z = x.ulike(); + + std::vector values; + Context ctx(1); + ctx.setInputArray(0, &x); + ctx.setInputArray(1, &y); + ctx.setOutputArray(0, &z); + + nd4j::ops::biasadd op; + + + for (int e = 0; e < 10000; e++) { + auto timeStart = std::chrono::system_clock::now(); + + //op.execute({&x, &y}, {&z}, {true}); + nd4j::ops::helpers::addBias(ctx, x, y, z, true); + + auto timeEnd = std::chrono::system_clock::now(); + auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); + values.emplace_back(outerTime); + } + + std::sort(values.begin(), values.end()); + + nd4j_printf("Shape: [%lld, %lld, %lld, %lld]; Threads: [%i]; Time: %lld us;\n", shape[0], shape[1], shape[2], shape[3], t, values[values.size() / 2]); + } + } +} +*/ + /* TEST_F(PlaygroundTests, test_s_0) { auto x = NDArrayFactory::create('c', {32, 112, 112, 16}); From 8a0d5e3b97a784d53006599493522564ad4050d2 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sun, 9 Feb 2020 19:48:32 +0300 Subject: [PATCH 2/7] Compilation units (#224) * - TrueBroadcastHelper split into multiple compilation units - legacy gemm.cpp disabled Signed-off-by: raver119 * - IndexReduce int32/int64 split into multiple compilation units Signed-off-by: raver119 * - Reduce3 ops split into multiple compilation units Signed-off-by: raver119 --- .../{helpers => loops}/TrueBroadcastHelper.h | 0 .../cpu/TrueBroadcastHelper.hpp} | 5 ++-- .../TrueBroadcastHelper_0.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_1.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_2.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_3.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_4.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_5.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_6.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_7.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_8.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_9.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_bool.cpp | 27 ++++++++++++++++++ .../TrueBroadcastHelper_int.cpp | 27 ++++++++++++++++++ ...duce_int32.cpp => indexreduce_int32_0.cpp} | 2 +- ...duce_int64.cpp => indexreduce_int32_1.cpp} | 2 +- .../compilation_units/indexreduce_int32_2.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_3.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_4.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_5.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_6.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_7.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_8.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int32_9.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_0.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_1.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_2.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_3.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_4.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_5.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_6.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_7.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_8.cpp | 28 +++++++++++++++++++ .../compilation_units/indexreduce_int64_9.cpp | 28 +++++++++++++++++++ ...uce3_double.cpp => reduce3_bfloat16_0.cpp} | 2 +- ...ce3_float16.cpp => reduce3_bfloat16_1.cpp} | 2 +- ...duce3_float.cpp => reduce3_bfloat16_2.cpp} | 2 +- ...e3_bfloat16.cpp => reduce3_bfloat16_3.cpp} | 2 +- .../compilation_units/reduce3_bfloat16_4.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_bfloat16_5.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_bfloat16_6.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_bfloat16_7.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_bfloat16_8.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_bfloat16_9.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_0.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_1.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_2.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_3.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_4.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_5.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_6.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_7.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_8.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_double_9.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_0.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_1.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_2.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_3.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_4.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_5.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_6.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_7.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_8.cpp | 28 +++++++++++++++++++ .../compilation_units/reduce3_float16_9.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_0.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_1.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_2.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_3.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_4.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_5.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_6.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_7.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_8.cpp | 28 +++++++++++++++++++ .../cpu/compilation_units/reduce3_float_9.cpp | 28 +++++++++++++++++++ .../cuda/TrueBroadcastHelper.cu | 0 libnd4j/include/ops/impl/gemm.cpp | 4 +-- .../layers_tests/DeclarableOpsTests1.cpp | 2 ++ .../tests_cpu/libnd4j_tests/CMakeLists.txt | 2 +- 78 files changed, 1850 insertions(+), 11 deletions(-) rename libnd4j/include/{helpers => loops}/TrueBroadcastHelper.h (100%) rename libnd4j/include/{helpers/cpu/TrueBroadcastHelper.cpp => loops/cpu/TrueBroadcastHelper.hpp} (99%) create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_bool.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_int.cpp rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int32.cpp => indexreduce_int32_0.cpp} (94%) rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int64.cpp => indexreduce_int32_1.cpp} (94%) create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{reduce3_double.cpp => reduce3_bfloat16_0.cpp} (96%) rename libnd4j/include/loops/cpu/compilation_units/{reduce3_float16.cpp => reduce3_bfloat16_1.cpp} (96%) rename libnd4j/include/loops/cpu/compilation_units/{reduce3_float.cpp => reduce3_bfloat16_2.cpp} (96%) rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16.cpp => reduce3_bfloat16_3.cpp} (96%) create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp rename libnd4j/include/{helpers => loops}/cuda/TrueBroadcastHelper.cu (100%) diff --git a/libnd4j/include/helpers/TrueBroadcastHelper.h b/libnd4j/include/loops/TrueBroadcastHelper.h similarity index 100% rename from libnd4j/include/helpers/TrueBroadcastHelper.h rename to libnd4j/include/loops/TrueBroadcastHelper.h diff --git a/libnd4j/include/helpers/cpu/TrueBroadcastHelper.cpp b/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp similarity index 99% rename from libnd4j/include/helpers/cpu/TrueBroadcastHelper.cpp rename to libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp index 171d082a7..c79c1f242 100644 --- a/libnd4j/include/helpers/cpu/TrueBroadcastHelper.cpp +++ b/libnd4j/include/loops/cpu/TrueBroadcastHelper.hpp @@ -18,7 +18,7 @@ // @author Yurii Shyrma (iuriish@yahoo.com) // -#include +#include #include #include @@ -210,6 +210,7 @@ void TrueBroadcastIntHelper::exec(const nd4j::broadcast::IntOps opNum, const DISPATCH_BY_OPNUM_T(exec, PARAMS(xArr, yArr, zArr), BROADCAST_INT_OPS); } +/* BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_0); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_1); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_2); @@ -224,6 +225,6 @@ BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWI BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastBoolHelper, , LIBND4J_TYPES, BOOL_TYPES); BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastIntHelper, , INTEGER_TYPES); - +*/ } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_0.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_0.cpp new file mode 100644 index 000000000..4685e8f5b --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_0.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_9); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_1.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_1.cpp new file mode 100644 index 000000000..6c9eb4022 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_1.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_0); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_2.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_2.cpp new file mode 100644 index 000000000..4d020fdfa --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_2.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_1); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_3.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_3.cpp new file mode 100644 index 000000000..327df9a88 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_3.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_2); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_4.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_4.cpp new file mode 100644 index 000000000..3fb868278 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_4.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_3); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_5.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_5.cpp new file mode 100644 index 000000000..9b281516a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_5.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_4); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_6.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_6.cpp new file mode 100644 index 000000000..dbdd65907 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_6.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_5); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_7.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_7.cpp new file mode 100644 index 000000000..5b3beaec6 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_7.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_6); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_8.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_8.cpp new file mode 100644 index 000000000..51e81d32a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_8.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_7); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_9.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_9.cpp new file mode 100644 index 000000000..77f5e5720 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_9.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastHelper, , PAIRWISE_TYPES_8); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_bool.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_bool.cpp new file mode 100644 index 000000000..c2e116666 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_bool.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastBoolHelper, , LIBND4J_TYPES, BOOL_TYPES); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_int.cpp b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_int.cpp new file mode 100644 index 000000000..04b0fc3e4 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/TrueBroadcastHelper_int.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../TrueBroadcastHelper.hpp" + +namespace nd4j { + namespace helpers { + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT TrueBroadcastIntHelper, , INTEGER_TYPES); + } +} diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp similarity index 94% rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp index 7b87535c2..c3f71b5c4 100644 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp @@ -23,6 +23,6 @@ namespace functions { namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES, (nd4j::DataType::INT32, int32_t)); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (nd4j::DataType::INT32, int32_t)); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp similarity index 94% rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp index d1005699c..2eeb1e37d 100644 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp @@ -23,6 +23,6 @@ namespace functions { namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES, (nd4j::DataType::INT64, Nd4jLong)); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (nd4j::DataType::INT32, int32_t)); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp new file mode 100644 index 000000000..da0c55f51 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp new file mode 100644 index 000000000..3c255aa4f --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp new file mode 100644 index 000000000..e04ce70e2 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp new file mode 100644 index 000000000..17dd63e46 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp new file mode 100644 index 000000000..c84fb089a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp new file mode 100644 index 000000000..155c4e3f4 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp new file mode 100644 index 000000000..cdcfd8c51 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp new file mode 100644 index 000000000..683245ef4 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (nd4j::DataType::INT32, int32_t)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp new file mode 100644 index 000000000..6818dea5f --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp new file mode 100644 index 000000000..c15541da5 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp new file mode 100644 index 000000000..a95682991 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp new file mode 100644 index 000000000..22597879c --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp new file mode 100644 index 000000000..a5b2afb12 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp new file mode 100644 index 000000000..08797092a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp new file mode 100644 index 000000000..b7ca6d81e --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp new file mode 100644 index 000000000..2eb10091a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp new file mode 100644 index 000000000..f2a04cc8f --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp new file mode 100644 index 000000000..062db6187 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../indexreduce.hpp" + +namespace functions { + namespace indexreduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (nd4j::DataType::INT64, Nd4jLong)); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp similarity index 96% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp index 10e78e914..19483c1df 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp @@ -23,6 +23,6 @@ namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_2); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_3); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp similarity index 96% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp index 8a738acf9..88225bd85 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp @@ -23,6 +23,6 @@ namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_0); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_3); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp similarity index 96% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp index 5362352b6..7bed85c5d 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp @@ -23,6 +23,6 @@ namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_1); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_3); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp similarity index 96% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp index 8df61ad29..87042d342 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp @@ -23,6 +23,6 @@ namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_3); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_3); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp new file mode 100644 index 000000000..0802e11f4 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp new file mode 100644 index 000000000..87ec2d3f8 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp new file mode 100644 index 000000000..10dc7d69b --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp new file mode 100644 index 000000000..28ba56376 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp new file mode 100644 index 000000000..8087f6a07 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp new file mode 100644 index 000000000..4a5186cf0 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp new file mode 100644 index 000000000..34172b4b3 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp new file mode 100644 index 000000000..c2f7c7e9c --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp new file mode 100644 index 000000000..41c1dd679 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp new file mode 100644 index 000000000..a44085232 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp new file mode 100644 index 000000000..d346d175b --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp new file mode 100644 index 000000000..86cf48ff7 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp new file mode 100644 index 000000000..92f7ac39e --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp new file mode 100644 index 000000000..eb216f89f --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp new file mode 100644 index 000000000..d1e9f8c96 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp new file mode 100644 index 000000000..fa00bde19 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp new file mode 100644 index 000000000..cb212b06b --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp new file mode 100644 index 000000000..4a7fdee8a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp new file mode 100644 index 000000000..aaafe1bae --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp new file mode 100644 index 000000000..9b8cf0c6a --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp new file mode 100644 index 000000000..4d02ffe53 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp new file mode 100644 index 000000000..88ce3e5e2 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp new file mode 100644 index 000000000..26d4df1dd --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp new file mode 100644 index 000000000..3b04f47aa --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp new file mode 100644 index 000000000..c87090229 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp new file mode 100644 index 000000000..d5acb3935 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp new file mode 100644 index 000000000..e7e1fab61 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp new file mode 100644 index 000000000..98ccf8b35 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp new file mode 100644 index 000000000..6782d74ed --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp new file mode 100644 index 000000000..915b0ac0e --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp new file mode 100644 index 000000000..d34e61181 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp new file mode 100644 index 000000000..89a8f164f --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp new file mode 100644 index 000000000..70e482b8b --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp new file mode 100644 index 000000000..88663cd7d --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp new file mode 100644 index 000000000..d5399a4d8 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp new file mode 100644 index 000000000..e27e7ab12 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../reduce3.hpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/helpers/cuda/TrueBroadcastHelper.cu b/libnd4j/include/loops/cuda/TrueBroadcastHelper.cu similarity index 100% rename from libnd4j/include/helpers/cuda/TrueBroadcastHelper.cu rename to libnd4j/include/loops/cuda/TrueBroadcastHelper.cu diff --git a/libnd4j/include/ops/impl/gemm.cpp b/libnd4j/include/ops/impl/gemm.cpp index a81c12818..2779bdadf 100644 --- a/libnd4j/include/ops/impl/gemm.cpp +++ b/libnd4j/include/ops/impl/gemm.cpp @@ -144,7 +144,7 @@ namespace nd4j { delete[] aT; } - BUILD_TRIPLE_TEMPLATE(template class GEMV, , LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); - BUILD_TRIPLE_TEMPLATE(template class GEMM, , LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); + //BUILD_TRIPLE_TEMPLATE(template class GEMV, , LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); + //BUILD_TRIPLE_TEMPLATE(template class GEMM, , LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp index 795a7da4d..9df949267 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp @@ -1581,6 +1581,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { #ifndef __CUDABLAS__ ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests1, TestGemv1) { + /* auto xBuffer = new float[15]{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f}; auto xShape = new Nd4jLong[8] {2, 5, 3, 3, 1, 0, 1, 99}; ArrayOptions::setDataType(xShape, nd4j::DataType::FLOAT32); @@ -1602,6 +1603,7 @@ TEST_F(DeclarableOpsTests1, TestGemv1) { ASSERT_TRUE(z->equalsTo(exp)); delete []xBuffer; delete []xShape; delete x; delete []yBuffer; delete []yShape; delete y; delete z; delete []expBuffer; delete exp; + */ } #endif diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt index fbba329e3..a852a0c4c 100644 --- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt @@ -273,7 +273,7 @@ add_executable(runtests ${LOOPS_SOURCES} ../../blas/cpu/NativeOps.cpp ../../blas ../../include/cnpy/cnpy.cpp ../../include/nd4jmemset.h ../../include/nd4jmalloc.h ../../blas/Environment.cpp ../../blas/Environment.h ${EXEC_SOURCES} ${HELPERS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES} ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_PLATFORM_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES} - ${OPS_SOURCES} ${TEST_SOURCES} ${PERF_SOURCES}) + ${OPS_SOURCES} ${TEST_SOURCES} ${PERF_SOURCES} ../../include/loops/cpu/compilation_units/TrueBroadcastHelper_1.cpp) target_link_libraries(runtests gtest ${MKLDNN} gtest_main ${BLAS_LIBRARIES}) From ebeeb8bc4893bff1ac3c21065be4a8cd404c7252 Mon Sep 17 00:00:00 2001 From: Eduardo Gonzalez Date: Mon, 10 Feb 2020 12:33:04 +0900 Subject: [PATCH 3/7] Fix BERT word piece tokenizer stack overflow error (#205) * Change the regular expression for the Bert tokenizer. The previous regular expression causes StackOverflowErrors if given a document with a large amount of whitespace. I believe that the one I've provided is an equivalent. * Add test for new BertWordPieceTokenizer RegEx. This test should cause a StackOverflowError with the previous version. * Fix assert off by one. --- .../tokenizer/BertWordPieceTokenizer.java | 2 +- .../BertWordPieceTokenizerTests.java | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizer.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizer.java index 0f9c3ec93..817f8c563 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizer.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizer.java @@ -29,7 +29,7 @@ import java.util.regex.Pattern; */ @Slf4j public class BertWordPieceTokenizer implements Tokenizer { - public static final Pattern splitPattern = Pattern.compile("(\\p{javaWhitespace}|((?<=\\p{Punct})|(?=\\p{Punct})))+"); + public static final Pattern splitPattern = Pattern.compile("\\p{javaWhitespace}+|((?<=\\p{Punct})+|(?=\\p{Punct}+))"); private final List tokens; private final TokenPreProcess preTokenizePreProcessor; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizerTests.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizerTests.java index 80570ae54..a225230af 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizerTests.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/text/tokenization/tokenizer/BertWordPieceTokenizerTests.java @@ -220,4 +220,23 @@ public class BertWordPieceTokenizerTests extends BaseDL4JTest { String exp = s.toLowerCase(); assertEquals(exp, s2); } + + @Test + public void testTokenizerHandlesLargeContiguousWhitespace() throws Exception { + StringBuilder sb = new StringBuilder(); + sb.append("apple."); + for (int i = 0; i < 10000; i++) { + sb.append(" "); + } + sb.append(".pen. .pineapple"); + + File f = Resources.asFile("deeplearning4j-nlp/bert/uncased_L-12_H-768_A-12/vocab.txt"); + BertWordPieceTokenizerFactory t = new BertWordPieceTokenizerFactory(f, true, true, StandardCharsets.UTF_8); + + Tokenizer tokenizer = t.create(sb.toString()); + List list = tokenizer.getTokens(); + System.out.println(list); + + assertEquals(8, list.size()); + } } From 237c1371668f2fbaaf2d54002fa3e22ed5f19a02 Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 10 Feb 2020 10:57:18 +0300 Subject: [PATCH 4/7] few more smaller compilation units (#226) Signed-off-by: raver119 --- ...32.cpp => IndexReductionLoops_int32_0.cpp} | 2 +- ...64.cpp => IndexReductionLoops_int32_1.cpp} | 2 +- .../cpu/loops/IndexReductionLoops_int32_2.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_3.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_4.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_5.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_6.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_7.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_8.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int32_9.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_0.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_1.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_2.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_3.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_4.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_5.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_6.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_7.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_8.cpp | 24 +++++++++++++++++ .../cpu/loops/IndexReductionLoops_int64_9.cpp | 24 +++++++++++++++++ .../loops/cpu/compilation_units/random_0.cpp | 27 +++++++++++++++++++ .../loops/cpu/compilation_units/random_1.cpp | 27 +++++++++++++++++++ .../loops/cpu/compilation_units/random_2.cpp | 27 +++++++++++++++++++ .../loops/cpu/compilation_units/random_3.cpp | 27 +++++++++++++++++++ .../loops/cpu/{random.cpp => random.hpp} | 2 +- .../compilation_units/specials_double_0.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_1.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_2.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_3.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_4.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_5.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_6.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_7.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_8.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_double_9.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_0.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_1.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_2.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_3.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_4.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_5.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_6.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_7.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_8.cpp | 26 ++++++++++++++++++ .../compilation_units/specials_single_9.cpp | 26 ++++++++++++++++++ .../ops/impl/{specials.cpp => specials.hpp} | 4 +-- 46 files changed, 1065 insertions(+), 5 deletions(-) rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int32.cpp => IndexReductionLoops_int32_0.cpp} (92%) rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int64.cpp => IndexReductionLoops_int32_1.cpp} (92%) create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp create mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/random_0.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/random_1.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/random_2.cpp create mode 100644 libnd4j/include/loops/cpu/compilation_units/random_3.cpp rename libnd4j/include/loops/cpu/{random.cpp => random.hpp} (99%) create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp create mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp rename libnd4j/include/ops/impl/{specials.cpp => specials.hpp} (99%) diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp similarity index 92% rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp index 8a4b3cd7d..a694e42ca 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp similarity index 92% rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp index 4fcb63ebf..236428c4a 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp new file mode 100644 index 000000000..173744c97 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp new file mode 100644 index 000000000..fbb2fde50 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp new file mode 100644 index 000000000..1fd8196e7 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp new file mode 100644 index 000000000..1378c661c --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp new file mode 100644 index 000000000..5b338bb09 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp new file mode 100644 index 000000000..7dfe7d939 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp new file mode 100644 index 000000000..14e91685d --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp new file mode 100644 index 000000000..677802476 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (nd4j::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp new file mode 100644 index 000000000..cb295f479 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp new file mode 100644 index 000000000..8b5914d84 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp new file mode 100644 index 000000000..97d8f5906 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp new file mode 100644 index 000000000..6463c8584 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp new file mode 100644 index 000000000..365e2a70f --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp new file mode 100644 index 000000000..77e6bf5be --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp new file mode 100644 index 000000000..f355d655e --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp new file mode 100644 index 000000000..542587b18 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp new file mode 100644 index 000000000..bccc40219 --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp new file mode 100644 index 000000000..85475046c --- /dev/null +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include "./IndexReductionLoops.hpp" + +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (nd4j::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_0.cpp b/libnd4j/include/loops/cpu/compilation_units/random_0.cpp new file mode 100644 index 000000000..6424ccb6e --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/random_0.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../random.hpp" + +namespace functions { + namespace random { + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_1.cpp b/libnd4j/include/loops/cpu/compilation_units/random_1.cpp new file mode 100644 index 000000000..316d55bf6 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/random_1.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../random.hpp" + +namespace functions { + namespace random { + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_2.cpp b/libnd4j/include/loops/cpu/compilation_units/random_2.cpp new file mode 100644 index 000000000..90d080b63 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/random_2.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../random.hpp" + +namespace functions { + namespace random { + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_3.cpp b/libnd4j/include/loops/cpu/compilation_units/random_3.cpp new file mode 100644 index 000000000..97e5211e8 --- /dev/null +++ b/libnd4j/include/loops/cpu/compilation_units/random_3.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../random.hpp" + +namespace functions { + namespace random { + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/random.cpp b/libnd4j/include/loops/cpu/random.hpp similarity index 99% rename from libnd4j/include/loops/cpu/random.cpp rename to libnd4j/include/loops/cpu/random.hpp index 6fccc6376..35674de36 100644 --- a/libnd4j/include/loops/cpu/random.cpp +++ b/libnd4j/include/loops/cpu/random.hpp @@ -281,6 +281,6 @@ namespace functions { } - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES); + //BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES); } } \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp new file mode 100644 index 000000000..1a35ecd47 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_0); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp new file mode 100644 index 000000000..be8edad04 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_1); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp new file mode 100644 index 000000000..915983bb0 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_2); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp new file mode 100644 index 000000000..d2f59137d --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_3); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp new file mode 100644 index 000000000..29caeae84 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_4); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp new file mode 100644 index 000000000..489d1fc6a --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_5); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp new file mode 100644 index 000000000..6f50c4682 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_6); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp new file mode 100644 index 000000000..03a31221f --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_7); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp new file mode 100644 index 000000000..074f09238 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_8); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp new file mode 100644 index 000000000..8de7c663b --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_9); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp new file mode 100644 index 000000000..3e841dfae --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_0); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp new file mode 100644 index 000000000..59a215c20 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_1); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp new file mode 100644 index 000000000..77617173d --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_2); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp new file mode 100644 index 000000000..2c19c3bc6 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_3); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp new file mode 100644 index 000000000..cd6babb61 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_4); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp new file mode 100644 index 000000000..b54028b42 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_5); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp new file mode 100644 index 000000000..4ca54e7b1 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_6); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp new file mode 100644 index 000000000..3d843ca4c --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_7); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp new file mode 100644 index 000000000..d8dc34f1c --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_8); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp new file mode 100644 index 000000000..2c12f2803 --- /dev/null +++ b/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../specials.hpp" + +namespace nd4j { + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_9); +} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/specials.cpp b/libnd4j/include/ops/impl/specials.hpp similarity index 99% rename from libnd4j/include/ops/impl/specials.cpp rename to libnd4j/include/ops/impl/specials.hpp index ad7f4060d..207ca5964 100644 --- a/libnd4j/include/ops/impl/specials.cpp +++ b/libnd4j/include/ops/impl/specials.hpp @@ -661,7 +661,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) samediff::Threads::parallel_tad(func, 0, numTads); } - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES); - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES); + //BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES); + //BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES); } From c9ffb6cbeca37287d0cb1449776ae4996b60456f Mon Sep 17 00:00:00 2001 From: Fariz Rahman Date: Mon, 10 Feb 2020 14:54:44 +0400 Subject: [PATCH 5/7] Python: Use memoryview instead of bytearray (#225) * memoryview * cleanup --- .../main/java/org/datavec/python/Python.java | 11 +++-- .../org/datavec/python/PythonExecutioner.java | 19 +++----- .../java/org/datavec/python/PythonObject.java | 43 +++++++++++++++++-- .../datavec/python/TestPythonExecutioner.java | 23 +++++++++- 4 files changed, 74 insertions(+), 22 deletions(-) diff --git a/datavec/datavec-python/src/main/java/org/datavec/python/Python.java b/datavec/datavec-python/src/main/java/org/datavec/python/Python.java index 80d6643e8..9dabbef2d 100644 --- a/datavec/datavec-python/src/main/java/org/datavec/python/Python.java +++ b/datavec/datavec-python/src/main/java/org/datavec/python/Python.java @@ -144,6 +144,14 @@ public class Python { return attr("bytearray"); } + public static PythonObject memoryview(PythonObject pythonObject) { + return attr("memoryview").call(pythonObject); + } + + public static PythonObject memoryviewType() { + return attr("memoryview"); + } + public static PythonObject bytes(PythonObject pythonObject) { return attr("bytes").call(pythonObject); } @@ -250,9 +258,6 @@ public class Python { public static void exec(String code)throws PythonException{ PythonExecutioner.exec(code); } - public static void exec(String code, PythonVariables inputs) throws PythonException{ - PythonExecutioner.exec(code, inputs); - } public static void exec(String code, PythonVariables inputs, PythonVariables outputs) throws PythonException{ PythonExecutioner.exec(code, inputs, outputs); } diff --git a/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java b/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java index a06e60e98..e2d2e5747 100644 --- a/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java +++ b/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java @@ -20,23 +20,16 @@ package org.datavec.python; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; -import org.bytedeco.cpython.PyThreadState; -import org.bytedeco.javacpp.BytePointer; import org.bytedeco.numpy.global.numpy; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.io.ClassPathResource; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import static org.bytedeco.cpython.global.python.*; -import static org.bytedeco.cpython.global.python.PyThreadState_Get; import static org.datavec.python.Python.*; /** @@ -105,6 +98,7 @@ public class PythonExecutioner { init(); } + private static synchronized void init() { if (init.get()) { return; @@ -204,6 +198,9 @@ public class PythonExecutioner { } public static void getVariables(PythonVariables pyVars) throws PythonException { + if (pyVars == null){ + return; + } for (String varName : pyVars.getVariables()) { pyVars.setValue(varName, getVariable(varName, pyVars.getType(varName))); } @@ -240,12 +237,6 @@ public class PythonExecutioner { throwIfExecutionFailed(); } - public static void exec(String code, PythonVariables outputVariables)throws PythonException { - simpleExec(getWrappedCode(code)); - throwIfExecutionFailed(); - getVariables(outputVariables); - } - public static void exec(String code, PythonVariables inputVariables, PythonVariables outputVariables) throws PythonException { setVariables(inputVariables); simpleExec(getWrappedCode(code)); @@ -354,7 +345,6 @@ public class PythonExecutioner { log.info("Setting python path " + path); StringBuffer sb = new StringBuffer(); File[] packages = numpy.cachePackages(); - JavaCppPathType pathAppendValue = JavaCppPathType.valueOf(System.getProperty(JAVACPP_PYTHON_APPEND_TYPE, DEFAULT_APPEND_TYPE).toUpperCase()); switch (pathAppendValue) { case BEFORE: @@ -395,4 +385,5 @@ public class PythonExecutioner { throw new IllegalStateException("Unable to reset python path. Already initialized."); } } + } diff --git a/datavec/datavec-python/src/main/java/org/datavec/python/PythonObject.java b/datavec/datavec-python/src/main/java/org/datavec/python/PythonObject.java index f1d54168b..c0079919c 100644 --- a/datavec/datavec-python/src/main/java/org/datavec/python/PythonObject.java +++ b/datavec/datavec-python/src/main/java/org/datavec/python/PythonObject.java @@ -69,7 +69,11 @@ public class PythonObject { } public PythonObject(BytePointer bp){ - nativePythonObject = PyByteArray_FromStringAndSize(bp, bp.capacity()); + + long address = bp.address(); + long size = bp.capacity(); + NumpyArray npArr = NumpyArray.builder().address(address).shape(new long[]{size}).strides(new long[]{1}).dtype(DataType.BYTE).build(); + nativePythonObject = Python.memoryview(new PythonObject(npArr)).nativePythonObject; } public PythonObject(NumpyArray npArray) { @@ -343,13 +347,28 @@ public class PythonObject { dtype = DataType.DOUBLE; } else if (dtypeName.equals("float32")) { dtype = DataType.FLOAT; - } else if (dtypeName.equals("int16")) { + } else if (dtypeName.equals("int8")){ + dtype = DataType.INT8; + }else if (dtypeName.equals("int16")) { dtype = DataType.SHORT; } else if (dtypeName.equals("int32")) { dtype = DataType.INT; } else if (dtypeName.equals("int64")) { dtype = DataType.LONG; - } else { + } + else if (dtypeName.equals("uint8")){ + dtype = DataType.UINT8; + } + else if (dtypeName.equals("uint16")){ + dtype = DataType.UINT16; + } + else if (dtypeName.equals("uint32")){ + dtype = DataType.UINT32; + } + else if (dtypeName.equals("uint64")){ + dtype = DataType.UINT64; + } + else { throw new RuntimeException("Unsupported array type " + dtypeName + "."); } return new NumpyArray(address, jshape, jstrides, dtype); @@ -518,6 +537,22 @@ public class PythonObject { else if (Python.isinstance(this, Python.bytearrayType())){ return PyByteArray_AsString(nativePythonObject); } + else if (Python.isinstance(this, Python.memoryviewType())){ + +// PyObject np = PyImport_ImportModule("numpy"); +// PyObject array = PyObject_GetAttrString(np, "asarray"); +// PyObject npArr = PyObject_CallObject(array, nativePythonObject); // Doesn't work + // Invoke interpreter: + String tempContext = "temp" + UUID.randomUUID().toString().replace('-', '_'); + String originalContext = Python.getCurrentContext(); + Python.setContext(tempContext); + PythonExecutioner.setVariable("memview", this); + PythonExecutioner.exec("import numpy as np\narr = np.array(memview)"); + BytePointer ret = new BytePointer(PythonExecutioner.getVariable("arr").toNumpy().getNd4jArray().data().pointer()); + Python.setContext(originalContext); + Python.deleteContext(tempContext); + return ret; + } else{ PyObject ctypes = PyImport_ImportModule("ctypes"); PyObject cArrType = PyObject_GetAttrString(ctypes, "Array"); @@ -542,7 +577,7 @@ public class PythonObject { return new BytePointer(ptr); } else{ - throw new PythonException("Expected bytes, bytearray or ctypesArray. Received " + Python.type(this).toString()); + throw new PythonException("Expected bytes, bytearray, memoryview or ctypesArray. Received " + Python.type(this).toString()); } } diff --git a/datavec/datavec-python/src/test/java/org/datavec/python/TestPythonExecutioner.java b/datavec/datavec-python/src/test/java/org/datavec/python/TestPythonExecutioner.java index bb436e808..b8916476c 100644 --- a/datavec/datavec-python/src/test/java/org/datavec/python/TestPythonExecutioner.java +++ b/datavec/datavec-python/src/test/java/org/datavec/python/TestPythonExecutioner.java @@ -24,6 +24,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -237,12 +238,13 @@ public class TestPythonExecutioner { PythonVariables pyOutputs= new PythonVariables(); pyOutputs.addStr("out"); - String code = "out = buff.decode()"; + String code = "out = bytes(buff).decode()"; Python.exec(code, pyInputs, pyOutputs); Assert.assertEquals("abc", pyOutputs.getStrValue("out")); } + @Test public void testByteBufferOutputNoCopy() throws Exception{ INDArray buff = Nd4j.zeros(new int[]{3}, DataType.BYTE); @@ -262,6 +264,24 @@ public class TestPythonExecutioner { Assert.assertEquals("cba", pyOutputs.getBytesValue("buff").getString()); } + @Test + public void testByteBufferInplace() throws Exception{ + INDArray buff = Nd4j.zeros(new int[]{3}, DataType.BYTE); + buff.putScalar(0, 97); // a + buff.putScalar(1, 98); // b + buff.putScalar(2, 99); // c + PythonVariables pyInputs = new PythonVariables(); + pyInputs.addBytes("buff", new BytePointer(buff.data().pointer())); + String code = "buff[0]+=2\nbuff[2]-=2"; + Python.exec(code, pyInputs, null); + Assert.assertEquals("cba", pyInputs.getBytesValue("buff").getString()); + INDArray expected = buff.dup(); + expected.putScalar(0, 99); + expected.putScalar(2, 97); + Assert.assertEquals(buff, expected); + + } + @Test public void testByteBufferOutputWithCopy() throws Exception{ INDArray buff = Nd4j.zeros(new int[]{3}, DataType.BYTE); @@ -302,4 +322,5 @@ public class TestPythonExecutioner { Python.setMainContext(); } + } From f3fa4fd632a6614493f74dc1848fa4c41c8c14c6 Mon Sep 17 00:00:00 2001 From: raver119 Date: Wed, 12 Feb 2020 12:38:10 +0300 Subject: [PATCH 6/7] C++ NPY (#233) * import .npy files in C++ Signed-off-by: raver119 * reuse existing method Signed-off-by: raver119 * add CPU_FEATURES to static lib Signed-off-by: raver119 --- libnd4j/blas/CMakeLists.txt | 2 +- libnd4j/blas/NDArrayFactory.h | 7 +++++ libnd4j/blas/cpu/NDArrayFactory.cpp | 27 ++++++++++++++++++ .../tests_cpu/layers_tests/NDArrayTests2.cpp | 10 +++++++ .../tests_cpu/resources/arr_3,4_float32.npy | Bin 0 -> 176 bytes 5 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 libnd4j/tests_cpu/resources/arr_3,4_float32.npy diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index a54ad52b4..51a29e522 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -336,7 +336,7 @@ elseif(CPU_BLAS) if ("${LIBND4J_ALL_OPS}" AND "${LIBND4J_BUILD_MINIFIER}") message(STATUS "Building minifier...") add_executable(minifier ../minifier/minifier.cpp ../minifier/graphopt.cpp) - target_link_libraries(minifier ${LIBND4J_NAME}static ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES}) + target_link_libraries(minifier ${LIBND4J_NAME}static ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES}) endif() if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 4.9) diff --git a/libnd4j/blas/NDArrayFactory.h b/libnd4j/blas/NDArrayFactory.h index 5e979f1d8..bff199d08 100644 --- a/libnd4j/blas/NDArrayFactory.h +++ b/libnd4j/blas/NDArrayFactory.h @@ -108,6 +108,13 @@ namespace nd4j { template static NDArray create(char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context = nd4j::LaunchContext ::defaultContext()); + /** + * This method creates NDArray from .npy file + * @param fileName + * @return + */ + static NDArray fromNpyFile(const char *fileName); + /** * This factory create array from utf8 string * @return NDArray default dataType UTF8 diff --git a/libnd4j/blas/cpu/NDArrayFactory.cpp b/libnd4j/blas/cpu/NDArrayFactory.cpp index 738dccdbe..736452b48 100644 --- a/libnd4j/blas/cpu/NDArrayFactory.cpp +++ b/libnd4j/blas/cpu/NDArrayFactory.cpp @@ -24,11 +24,15 @@ #include #include #include +#include #include #include + + #include +#include namespace nd4j { @@ -688,4 +692,27 @@ template ND4J_EXPORT NDArray NDArrayFactory::create(int16_t* buffer, const char return NDArray( shape, string, dtype, context); } + + NDArray NDArrayFactory::fromNpyFile(const char *fileName) { + auto size = nd4j::graph::getFileSize(fileName); + if (size < 0) + throw std::runtime_error("File doesn't exit"); + + auto pNPY = reinterpret_cast(::numpyFromFile(std::string(fileName))); + + auto nBuffer = reinterpret_cast(::dataPointForNumpy(pNPY)); + auto shape = reinterpret_cast(::shapeBufferForNumpy(pNPY)); + + auto length = shape::length(shape); + int8_t *buffer = nullptr; + nd4j::memory::Workspace *workspace = nullptr; + auto byteLen = length * DataTypeUtils::sizeOfElement(ArrayOptions::dataType(shape)); + + ALLOCATE(buffer, workspace, byteLen, int8_t); + memcpy(buffer, nBuffer, byteLen); + + free(pNPY); + + return NDArray(buffer, shape, LaunchContext::defaultContext(), true); + } } diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp index 4507086f5..e3dc1aefc 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp @@ -1294,4 +1294,14 @@ TEST_F(NDArrayTest2, test_subarray_followed_by_reshape_1) { // r.printIndexedBuffer("r"); ASSERT_EQ(e, r); +} + +TEST_F(NDArrayTest2, test_numpy_import_1) { + std::string fname("./resources/arr_3,4_float32.npy"); + auto exp = NDArrayFactory::create('c', {3, 4}); + exp.linspace(0); + + auto array = NDArrayFactory::fromNpyFile(fname.c_str()); + + ASSERT_EQ(exp, array); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/resources/arr_3,4_float32.npy b/libnd4j/tests_cpu/resources/arr_3,4_float32.npy new file mode 100644 index 0000000000000000000000000000000000000000..ead15844c59e2c77138c84d14a805aa07d4172e4 GIT binary patch literal 176 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I%oItnJ5ItsN4WCJb+Flev`QVu`_#0@~a0EiC&@dF@caAaT*0Ad9oHgE(0 D6k#F} literal 0 HcmV?d00001 From b9411863028f8725d00145b1a254fa9612461bca Mon Sep 17 00:00:00 2001 From: Shams Ul Azeem Date: Wed, 12 Feb 2020 15:17:30 +0500 Subject: [PATCH 7/7] Making TypeName enum public (#235) --- .../src/main/java/org/datavec/python/PythonType.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datavec/datavec-python/src/main/java/org/datavec/python/PythonType.java b/datavec/datavec-python/src/main/java/org/datavec/python/PythonType.java index 60603a8e3..d0a3f488f 100644 --- a/datavec/datavec-python/src/main/java/org/datavec/python/PythonType.java +++ b/datavec/datavec-python/src/main/java/org/datavec/python/PythonType.java @@ -36,7 +36,7 @@ public abstract class PythonType { public abstract T toJava(PythonObject pythonObject) throws PythonException; private final TypeName typeName; - enum TypeName{ + public enum TypeName{ STR, INT, FLOAT,