From 69d91e272a5df21b763d80b5e11fe9f10d178fac Mon Sep 17 00:00:00 2001 From: Abdelrauf Date: Thu, 14 May 2020 14:41:55 +0400 Subject: [PATCH 01/21] - new implementations for Index Reductions (#421) * - new implementations for Index Reductions - small fix in the legacy reduction - disabled index reduction bench tests inside Playground Signed-off-by: Abdelrauf * Allow LIBND4J_TYPES Signed-off-by: Abdelrauf * index reduction stuff split into bunch of units * meh * IMax switched to new impl Signed-off-by: raver119@gmail.com * minor fix + test * minor fix * index range fix Signed-off-by: Abdelrauf * noop on empty outputs * minor fix * minor fix Signed-off-by: Abdelrauf * ArgMax replaces IMax Signed-off-by: raver119@gmail.com * argmax/argmin/argamax/argamin shape functions updated * ArgAmax/ArgAmin/ArgMin replaces IAMax/IAMin/IMin Signed-off-by: raver119@gmail.com * argmax/argmin/argamax/argamin CUDA * IMax replaced in dl4j Signed-off-by: raver119@gmail.com * Codegen output * imports fixed Signed-off-by: raver119@gmail.com * fix compilation issue Signed-off-by: Abdelrauf * Auto-generate compilation units Signed-off-by: Abdelrauf * Should fix NDArray refactored function calls in indexReductions.cu Signed-off-by: Abdelrauf Co-authored-by: raver119@gmail.com Co-authored-by: Alexander Stoyakin --- .../clustering/cluster/CentersHolder.java | 8 +- .../vectorizer/BagOfWordsVectorizerTest.java | 6 +- libnd4j/blas/CMakeLists.txt | 14 + libnd4j/include/helpers/LoopsCoordsHelper.h | 508 +++++++++- libnd4j/include/loops/cpu/indexreduce.hpp | 2 +- libnd4j/include/loops/cuda/indexreduce.cu | 15 +- .../ops/declarable/generic/reduce/argamax.cpp | 95 ++ .../ops/declarable/generic/reduce/argamin.cpp | 95 ++ .../ops/declarable/generic/reduce/argmax.cpp | 37 +- .../ops/declarable/generic/reduce/argmin.cpp | 43 +- .../ops/declarable/headers/parity_ops.h | 26 + .../cpu/compilation_units/argamax.cpp.in | 28 + .../cpu/compilation_units/argamin.cpp.in | 28 + .../cpu/compilation_units/argmax.cpp.in | 28 + .../cpu/compilation_units/argmin.cpp.in | 28 + .../crop_and_resize_0.cpp | 2 +- .../crop_and_resize_1.cpp | 2 +- .../crop_and_resize_2.cpp | 2 +- .../crop_and_resize_3.cpp | 2 +- .../crop_and_resize_4.cpp | 2 +- .../crop_and_resize_5.cpp | 2 +- .../crop_and_resize_6.cpp | 2 +- .../crop_and_resize_7.cpp | 2 +- .../crop_and_resize_8.cpp | 2 +- .../crop_and_resize_9.cpp | 2 +- .../helpers/cpu/indexReductions.cpp | 56 ++ .../helpers/cpu/indexReductions.hpp | 900 ++++++++++++++++++ .../helpers/cuda/indexReductions.cu | 106 +++ .../ops/declarable/helpers/reductions.h | 41 + .../layers_tests/DeclarableOpsTests19.cpp | 14 + .../layers_tests/PlaygroundTests.cpp | 255 ++++- .../nd4j/autodiff/samediff/ops/SDBaseOps.java | 16 +- .../nd4j/autodiff/samediff/ops/SDMath.java | 16 +- .../samediff/serde/LegacyOpMapper.java | 8 - .../autodiff/validation/OpValidation.java | 4 - .../converters/ImportClassMapping.java | 6 +- .../linalg/api/ops/impl/indexaccum/IAMax.java | 78 -- .../linalg/api/ops/impl/indexaccum/IAMin.java | 80 -- .../linalg/api/ops/impl/indexaccum/IMax.java | 87 -- .../linalg/api/ops/impl/indexaccum/IMin.java | 83 -- .../ops/impl/indexaccum/custom/ArgAmax.java | 111 +++ .../ops/impl/indexaccum/custom/ArgAmin.java | 111 +++ .../ops/impl/indexaccum/custom/ArgMax.java | 49 +- .../ops/impl/indexaccum/custom/ArgMin.java | 49 +- .../java/org/nd4j/linalg/factory/Nd4j.java | 12 +- .../org/nd4j/linalg/factory/ops/NDBase.java | 8 +- .../org/nd4j/linalg/factory/ops/NDMath.java | 8 +- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 54 ++ .../opvalidation/ReductionOpValidation.java | 8 +- .../autodiff/samediff/NameScopeTests.java | 2 +- .../test/java/org/nd4j/linalg/Nd4jTestsC.java | 34 +- .../java/org/nd4j/linalg/crash/CrashTest.java | 4 +- .../nd4j/linalg/ops/OpExecutionerTests.java | 26 +- .../nd4j/linalg/ops/OpExecutionerTestsC.java | 20 +- .../org/nd4j/linalg/shape/EmptyTests.java | 3 +- 55 files changed, 2742 insertions(+), 488 deletions(-) create mode 100644 libnd4j/include/ops/declarable/generic/reduce/argamax.cpp create mode 100644 libnd4j/include/ops/declarable/generic/reduce/argamin.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_0.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_1.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_2.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_3.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_4.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_5.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_6.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_7.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_8.cpp (95%) rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_9.cpp (95%) create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu create mode 100644 libnd4j/include/ops/declarable/helpers/reductions.h delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java index e692f9bd0..25542dc8f 100644 --- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java +++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java @@ -20,7 +20,7 @@ import org.deeplearning4j.clustering.algorithm.Distance; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.ReduceOp; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.primitives.Pair; @@ -29,7 +29,7 @@ public class CentersHolder { private long index = 0; protected transient ReduceOp op; - protected IMin imin; + protected ArgMin imin; protected transient INDArray distances; protected transient INDArray argMin; @@ -60,7 +60,7 @@ public class CentersHolder { if (op == null) { op = ClusterUtils.createDistanceFunctionOp(distanceFunction, centers, point.getArray(), 1); - imin = new IMin(distances, argMin); + imin = new ArgMin(distances, argMin); op.setZ(distances); } @@ -84,7 +84,7 @@ public class CentersHolder { if (op == null) { op = ClusterUtils.createDistanceFunctionOp(distanceFunction, centers, point.getArray(), 1); - imin = new IMin(distances, argMin); + imin = new ArgMin(distances, argMin); op.setZ(distances); } diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java index 368b48ee9..e450e6095 100755 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java @@ -23,6 +23,7 @@ import org.deeplearning4j.BaseDL4JTest; import org.junit.Rule; import org.junit.rules.TemporaryFolder; import org.nd4j.common.io.ClassPathResource; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; import org.deeplearning4j.text.sentenceiterator.labelaware.LabelAwareFileSentenceIterator; @@ -31,7 +32,6 @@ import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFac import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.util.SerializationUtils; @@ -111,7 +111,7 @@ public class BagOfWordsVectorizerTest extends BaseDL4JTest { INDArray labelz = dataSet.getLabels(); log.info("Labels array: " + labelz); - int idx2 = Nd4j.getExecutioner().exec(new IMax(labelz)).getInt(0); + int idx2 = Nd4j.getExecutioner().exec(new ArgMax(labelz))[0].getInt(0); //int idx2 = ((IndexAccumulation) Nd4j.getExecutioner().exec(new IMax(labelz))).getFinalResult().intValue(); // assertEquals(1.0, dataSet.getLabels().getDouble(0), 0.1); @@ -125,7 +125,7 @@ public class BagOfWordsVectorizerTest extends BaseDL4JTest { assertEquals(1, dataSet.getFeatures().getDouble(vocabCache.tokenFor("1").getIndex()), 0.1); assertEquals(0, dataSet.getFeatures().getDouble(vocabCache.tokenFor("2").getIndex()), 0.1); - int idx1 = Nd4j.getExecutioner().exec(new IMax(dataSet.getLabels())).getInt(0); + int idx1 = Nd4j.getExecutioner().exec(new ArgMax(dataSet.getLabels()))[0].getInt(0); //int idx1 = ((IndexAccumulation) Nd4j.getExecutioner().exec(new IMax(dataSet.getLabels()))).getFinalResult().intValue(); //assertEquals(0.0, dataSet.getLabels().getDouble(0), 0.1); diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index 8c8d5fb22..9902649f8 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -294,12 +294,26 @@ elseif(SD_CPU) file(GLOB_RECURSE LEGACY_SOURCES false ../include/legacy/impl/*.cpp ../include/legacy/cpu/*.cpp ../include/legacy/*.h) file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/*.cpp ../include/loops/*.h) + + file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in) + foreach(FL_ITEM ${COMPILATION_UNITS}) + string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) + set(FL_ITEM_WLE ${CMAKE_MATCH_1}) + foreach(FL_TYPE_INDEX RANGE 0 9) + message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") + configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) + LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) + endforeach() + endforeach() + if (SD_X86_BUILD) # we disable platform optimizations for certains files for linux/macos set_source_files_properties(cpu/NativeOps.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64 -mtune=generic") set_source_files_properties(../include/helpers/impl/OpTracker.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64 -mtune=generic") endif() + + if(SD_CHECK_VECTORIZATION) set(VECT_FILES cpu/NativeOps.cpp ${OPS_SOURCES} ${HELPERS_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES} ${LOOPS_SOURCES}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") diff --git a/libnd4j/include/helpers/LoopsCoordsHelper.h b/libnd4j/include/helpers/LoopsCoordsHelper.h index cd578b62a..8a1160aea 100644 --- a/libnd4j/include/helpers/LoopsCoordsHelper.h +++ b/libnd4j/include/helpers/LoopsCoordsHelper.h @@ -19,12 +19,13 @@ // #ifndef LIBND4J_LOOPCOORDSHELPER_H #define LIBND4J_LOOPCOORDSHELPER_H - +#include #include #include #include #include #include +#include namespace sd { #if defined(__GNUC__) @@ -125,7 +126,7 @@ namespace sd { } - FORCEINLINE zip_size_t offset_from_coords(const Nd4jLong*& x_strides, const Nd4jLong*& z_strides, const Nd4jLong* coords, const Nd4jLong& rank) { + FORCEINLINE zip_size_t offset_from_coords(const Nd4jLong* x_strides, const Nd4jLong* z_strides, const Nd4jLong* coords, const Nd4jLong& rank) { zip_size_t offset = { 0,0 }; size_t rank_4 = rank & -4; @@ -435,6 +436,509 @@ namespace sd { return last_offset; } + + struct triple_size_t { + size_t first; + size_t second; + size_t third; + }; + + + template + FORCEINLINE triple_size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* y_strides, const Nd4jLong* z_strides, Nd4jLong* coords, triple_size_t last_offset, const size_t rank, const size_t skip = 0) { + + Nd4jLong val = 0; + for (int i = rank - skip - 1; i >= 0; i--) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + last_offset.first += x_strides[i]; + last_offset.second += y_strides[i]; + last_offset.third += z_strides[i]; + break; + } + else { + last_offset.first -= coords[i] * x_strides[i]; + last_offset.second -= coords[i] * y_strides[i]; + last_offset.third -= coords[i] * z_strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + + template<> + FORCEINLINE triple_size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* x_strides, const Nd4jLong* y_strides, const Nd4jLong* z_strides, Nd4jLong* coords, triple_size_t last_offset, const size_t rank, const size_t skip) { + + Nd4jLong val = 0; + for (int i = skip; i < rank; i++) { + val = coords[i] + 1; + if (likely(val < bases[i])) { + coords[i] = val; + + last_offset.first += x_strides[i]; + last_offset.second += y_strides[i]; + last_offset.third += z_strides[i]; + break; + } + else { + last_offset.first -= coords[i] * x_strides[i]; + last_offset.second -= coords[i] * y_strides[i]; + last_offset.third -= coords[i] * z_strides[i]; + coords[i] = 0; + } + } + return last_offset; + } + + FORCEINLINE triple_size_t offset_from_coords(const Nd4jLong* x_strides, const Nd4jLong* y_strides, const Nd4jLong* z_strides, const Nd4jLong* coords, const Nd4jLong& rank) { + + triple_size_t offset = { 0,0 ,0 }; + size_t rank_4 = rank & -4; + for (int i = 0; i < rank_4; i += 4) { + offset.first = offset.first + + coords[i] * x_strides[i] + + coords[i + 1] * x_strides[i + 1] + + coords[i + 2] * x_strides[i + 2] + + coords[i + 3] * x_strides[i + 3]; + offset.second = offset.second + + coords[i] * y_strides[i] + + coords[i + 1] * y_strides[i + 1] + + coords[i + 2] * y_strides[i + 2] + + coords[i + 3] * y_strides[i + 3]; + offset.third = offset.third + + coords[i] * z_strides[i] + + coords[i + 1] * z_strides[i + 1] + + coords[i + 2] * z_strides[i + 2] + + coords[i + 3] * z_strides[i + 3]; + } + for (int i = rank_4; i < rank; i++) { + offset.first += coords[i] * x_strides[i]; + offset.second += coords[i] * y_strides[i]; + offset.third += coords[i] * z_strides[i]; + } + return offset; + } + + + template + FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip = 0) + { + if (skip < 0 || skip >= rank) skip = 0; + Nd4jLong total = 1; + for (int i = 0; i < rank - skip; i++) { + total *= bases[i]; + } + return total; + } + + + template<> + FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip) + { + if (skip < 0 || skip >= rank) skip = 0; + Nd4jLong total = 1; + for (int i = skip; i < rank; i++) { + total *= bases[i]; + } + + return total; + } + + + template + FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip, Nd4jLong& outSkippedLength) + { + if (skip < 0 || skip >= rank) skip = 0; + Nd4jLong total = 1; + for (int i = 0; i < rank - skip; i++) { + total *= bases[i]; + } + if (skip > 0) { + outSkippedLength = 1; + for (int i = rank - skip; i < rank; i++) { + outSkippedLength *= bases[i]; + } + } + else { + outSkippedLength = 0; + } + return total; + } + + + template<> + FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip, Nd4jLong& outSkippedLength) + { + if (skip < 0 || skip >= rank) skip = 0; + if (skip > 0) { + outSkippedLength = 1; + for (int i = 0; i < skip; i++) { + outSkippedLength *= bases[i]; + } + } + else { + outSkippedLength = 0; + } + Nd4jLong total = 1; + for (int i = skip; i < rank; i++) { + total *= bases[i]; + } + + return total; + } + + /* + for ODR rule it willbe declared as inline + rePartition for reductions and et cet + Indices mentioned in the dimension list will be moved to the tail + This way it will be splitted into two parts + the first part will contain output part,the second tail part will be used for reductions and other purposes + if squash is True then it will attempt to minimize the output ( for both orders) and the tail +*/ + + FORCEINLINE void rePartition(char order, const std::vector& dimensions, const size_t rank, const Nd4jLong* bases, const Nd4jLong* strides, Nd4jLong(&new_bases)[MAX_RANK], Nd4jLong(&new_strides)[MAX_RANK], int& first_begin, int& first_end, int& second_begin, int& second_end, bool first_squash = false, bool second_squash = true) { + + bool indices[MAX_RANK] = {}; + int ind = 0; + size_t second_rank; + if (dimensions.size() == 0 || (dimensions.size() == 1 && dimensions.at(0) == sd::DataTypeUtils::max())){ + first_end = 0; + first_begin = 0; + //treat it as the whole + for (int i = 0; i < rank; i++) { + new_bases[i] = bases[i]; + new_strides[i] = strides[i]; + } + second_rank = rank; + second_end = rank; + second_begin = 0; + + } + else { + for (int index : dimensions) { + if (index < 0) index = rank + index; + if (index >= 0 && index < rank) { + indices[index] = true; + } + } + + + //move output ones and + for (int i = 0; i < rank; i++) { + + if (!indices[i]) { + + new_bases[ind] = bases[i]; + new_strides[ind] = strides[i]; + ind++; + } + } + + + int first_rank = ind; + + first_end = ind; + first_begin = 0; + //nd4j_printf("rffrr ss & %d ind-- %d %d\n", first_rank, first_begin, first_end); + //squash output rank + if (first_squash && first_rank > 1) { + + if (order == 'c') { + int uniq_ind = first_end-1; + for (int i = first_end - 2; i >= first_begin; i--) { + if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) { + new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind]; + new_strides[uniq_ind] = new_strides[uniq_ind]; + --first_rank; + } + else { + --uniq_ind; + new_bases[uniq_ind] = new_bases[i]; + new_strides[uniq_ind] = new_strides[i]; + } + } + first_begin = first_end - first_rank; + } + else { + //squash fortran + int uniq_ind = 0; + for (int i = 1; i < first_end; i++) { + if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) { + new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind]; + new_strides[uniq_ind] = new_strides[uniq_ind]; + --first_rank; + } + else { + uniq_ind++; + new_bases[uniq_ind] = new_bases[i]; + new_strides[uniq_ind] = new_strides[i]; + } + } + first_end = first_begin + first_rank; + + } + ind = first_end; + } + + //nd4j_printf("rffrr ss & %d ind-- %d %d\n", first_rank, first_begin, first_end); + //move process indices + for (int i = 0; i < rank; i++) { + if (indices[i]) { + new_bases[ind] = bases[i]; + new_strides[ind] = strides[i]; + ind++; + } + } + + second_rank = ind - first_end; + second_end = ind; + second_begin = first_end; + + } + + + if (second_squash && second_rank > 1) { + + if (order == 'c') { + int uniq_ind = second_end - 1; + for (int i = second_end - 2; i >= second_begin; i--) { + if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) { + new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind]; + new_strides[uniq_ind] = new_strides[uniq_ind]; + --second_rank; + } + else { + --uniq_ind; + new_bases[uniq_ind] = new_bases[i]; + new_strides[uniq_ind] = new_strides[i]; + } + } + second_begin = second_end - second_rank; + } + else { + int uniq_ind = second_begin; + for (int i = second_begin+1; i < second_end; i++) { + if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) { + new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind]; + new_strides[uniq_ind] = new_strides[uniq_ind]; + --second_rank; + } + else { + uniq_ind++; + new_bases[uniq_ind] = new_bases[i]; + new_strides[uniq_ind] = new_strides[i]; + } + } + second_end = second_begin + second_rank; + + } + + } + + return; + } + + //basic CRTP static polymorphism classes for offset increments + + template + struct CoordsBaseMovement { + void init(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + static_cast(this)->initImpl(bases, strides1, strides2, rank, start); + } + + void increment(int skipRank = 0) { + static_cast(this)->incrementImpl(skipRank); + } + + Nd4jLong First() { return static_cast(this)->FirstImpl(); }; + Nd4jLong Second() { return static_cast(this)->SecondImpl(); }; + }; + + + struct ZipGenericCoordsRank1Stride1 : CoordsBaseMovement { + + size_t offset1; + size_t offset2; + + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + offset1 = start; + offset2 = start; + } + + void incrementImpl(int skipRank = 0) { + offset1 += 1; + offset2 += 1; + } + + Nd4jLong FirstImpl() { return offset1; }; + Nd4jLong SecondImpl() { return offset2; }; + + }; + + struct ZipGenericCoordsRank1BothStrideN : CoordsBaseMovement { + size_t stride1; + size_t stride2; + size_t offset1; + size_t offset2; + + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + stride1 = strides1[0]; + stride2 = strides2[0]; + offset1 = start * stride1; + offset2 = start * stride2; + } + + void incrementImpl(int skipRank = 0) { + offset1 += stride1; + offset2 += stride2; + } + + Nd4jLong FirstImpl() { return offset1; }; + Nd4jLong SecondImpl() { return offset2; }; + + }; + + template + struct ZipGenericCoordsConstMovementSecondStride1 : CoordsBaseMovement> { + sd::CoordsState cst; + Nd4jLong coords[MAX_RANK]; + size_t offset1; + size_t offset2; + int _rank; + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + offset1 = sd::init_coords(cst, start, bases, strides1); + offset2 = start * 1; + } + + void incrementImpl(int skipRank = 0) { + offset1 = sd::inc_coords(cst, offset1); + offset2 += 1; + } + + Nd4jLong FirstImpl() { return offset1; }; + Nd4jLong SecondImpl() { return offset2; }; + + }; + + template + struct ZipGenericCoordsConstMovementSecondStrideN : CoordsBaseMovement> { + sd::CoordsState cst; + Nd4jLong _stride2; + Nd4jLong coords[MAX_RANK]; + size_t offset1; + size_t offset2; + int _rank; + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + _stride2 = strides2[0]; + offset1 = sd::init_coords(cst, start, bases, strides1); + offset2 = start * _stride2; + } + + void incrementImpl(int skipRank = 0) { + offset1 = sd::inc_coords(cst, offset1); + offset2 += _stride2; + } + + Nd4jLong FirstImpl() { return offset1; }; + Nd4jLong SecondImpl() { return offset2; }; + + }; + + template + struct ZipGenericCoordsMovementSecondStrideN : CoordsBaseMovement> { + const Nd4jLong* _bases; + const Nd4jLong* _strides1; + Nd4jLong _stride2; + Nd4jLong coords[MAX_RANK]; + zip_size_t offset; + int _rank; + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + + _bases = bases; + _strides1 = strides1; + _stride2 = strides2[0]; + _rank = rank; + if (start == 0) { + for (int i = 0; i < MAX_RANK; i++) { + coords[i] = 0; + } + offset = { 0,0 }; + + } + else { + if (LastIndexFaster) { + sd::index2coords_C(start, rank, bases, (Nd4jLong*)&coords); + } + else { + sd::index2coords_F(start, rank, bases, (Nd4jLong*)&coords); + } + offset.first = sd::offset_from_coords(strides1, (Nd4jLong*)&coords, rank); + offset.second = start * _stride2; + } + + } + + void incrementImpl(int skipRank = 0) { + offset.first = inc_coords(_bases, _strides1, (Nd4jLong*)&coords, offset.first, _rank, skipRank); + offset.second += _stride2; + } + + Nd4jLong FirstImpl() { return offset.first; }; + Nd4jLong SecondImpl() { return offset.second; }; + + }; + + template + struct ZipGenericCoordsMovement : CoordsBaseMovement> { + const Nd4jLong* _bases; + const Nd4jLong* _strides1; + const Nd4jLong* _strides2; + Nd4jLong coords[MAX_RANK]; + zip_size_t offset; + int _rank; + + void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) { + + _bases = bases; + _strides1 = strides1; + _strides2 = strides2; + _rank = rank; + if (start == 0) { + for (int i = 0; i < MAX_RANK; i++) { + coords[i] = 0; + } + offset = { 0,0 }; + + } + else { + if (LastIndexFaster) { + sd::index2coords_C(start, rank, bases, (Nd4jLong*)&coords); + } + else { + sd::index2coords_F(start, rank, bases, (Nd4jLong*)&coords); + } + offset = sd::offset_from_coords(strides1, strides2, (Nd4jLong*)&coords, rank); + } + + } + + void incrementImpl(int skipRank = 0) { + offset = inc_coords(_bases, _strides1, _strides2, (Nd4jLong*)&coords, offset, _rank, skipRank); + } + + Nd4jLong FirstImpl() { return offset.first; }; + Nd4jLong SecondImpl() { return offset.second; }; + + }; + } + + #endif \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp index 296fbcdef..9373e3feb 100644 --- a/libnd4j/include/loops/cpu/indexreduce.hpp +++ b/libnd4j/include/loops/cpu/indexreduce.hpp @@ -69,7 +69,7 @@ Nd4jLong IndexReduce::execScalar(const void *vx, const Nd4jLong *xShapeInf for (int e = 0; e < maxThreads; e++) intermediatery[e].index = -1; - if (xEws == 1) { + if (xEws == 1 && shape::order(xShapeInfo) == 'c') { auto func = PRAGMA_THREADS_FOR { intermediatery[thread_id] = OpType::startingIndexValue(x); diff --git a/libnd4j/include/loops/cuda/indexreduce.cu b/libnd4j/include/loops/cuda/indexreduce.cu index e6a52b16a..dbe03a9bf 100644 --- a/libnd4j/include/loops/cuda/indexreduce.cu +++ b/libnd4j/include/loops/cuda/indexreduce.cu @@ -188,7 +188,7 @@ namespace functions { auto reductionBuffer = static_cast(vreductionBuffer); auto order = shape::order(xShapeInfo); int tid = blockIdx.x * blockDim.x + threadIdx.x; - __shared__ volatile int resultScalar; + __shared__ volatile bool resultScalar; //shared memory space for storing intermediate results __shared__ IndexValue* sPartials; @@ -214,17 +214,10 @@ namespace functions { zLen = shape::length(zShapeInfo); else zLen = 1; - if (dimensionLength == 1) { - if (zLen == 1 && (dimension == nullptr || dimension[0] == MAX_DIMENSION)) - resultScalar = 1; - else - resultScalar = 0; - } - else - resultScalar = 0; - if (zLen == 1) - resultScalar = 1; + resultScalar = true; + else + resultScalar = false; xLength = shape::length(xShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp new file mode 100644 index 000000000..5fb452227 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp @@ -0,0 +1,95 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + // Created by Abdelrauf 2020 (based on argmax) + +#include +#if NOT_EXCLUDED(OP_argamax) + +#include +#include +#include +#include + +namespace sd { + namespace ops { + DECLARE_TYPES(argamax) { + getOpDescriptor() + ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS }) + ->setAllowedOutputTypes({ ALL_INTS }); + } + + CUSTOM_OP_IMPL(argamax, 1, 1, false, 0, -2) { + auto input = INPUT_VARIABLE(0); + auto output = OUTPUT_VARIABLE(0); + + if (output->isEmpty()) + return Status::OK(); + + auto axis = *block.getIArguments(); + + // axis might be dynamic (i.e. tf mode) + if (block.width() > 1 && axis.size() == 0) { + auto axisVector = INPUT_VARIABLE(1); + helpers::adjustAxis(input->rankOf(), axisVector, axis); + helpers::argAbsMax(*input, *output, axis); + } + else { + helpers::argAbsMax(*input, *output, axis); + } + + STORE_RESULT(output); + + return Status::OK(); + } + + DECLARE_SHAPE_FN(argamax) { + std::vector dims; + + if (block.width() == 1) { + dims = *block.getIArguments(); + } else { + auto y = INPUT_VARIABLE(1); + dims = y->template asVectorT(); + } + + auto keepDims = block.numB() ? B_ARG(0) : false; + auto dtype = block.numD() ? D_ARG(0) : DataType::INT64; + + // we're resolving negative axis here + helpers::adjustAxis(shape::rank(inputShape->at(0)), dims); + + auto in = inputShape->at(0); + for (auto d : dims) { + // we have special case here + if (d == sd::DataTypeUtils::max()) + continue; + + REQUIRE_TRUE(d < shape::rank(in), 0, "ArgAmax: axis can't be above rank") + REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgAmax: you can't reduce along axis with 0 in shape"); + } + + // special case - output is scalar + if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { + return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + } + + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); + } + } +} + +#endif diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp new file mode 100644 index 000000000..4f590aae8 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp @@ -0,0 +1,95 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + // Created by Abdelrauf 2020 (based on argmax) + +#include +#if NOT_EXCLUDED(OP_argamin) + +#include +#include +#include +#include + +namespace sd { + namespace ops { + DECLARE_TYPES(argamin) { + getOpDescriptor() + ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS }) + ->setAllowedOutputTypes({ ALL_INTS }); + } + + CUSTOM_OP_IMPL(argamin, 1, 1, false, 0, -2) { + auto input = INPUT_VARIABLE(0); + auto output = OUTPUT_VARIABLE(0); + + if (output->isEmpty()) + return Status::OK(); + + auto axis = *block.getIArguments(); + + // axis might be dynamic (i.e. tf mode) + if (block.width() > 1 && axis.size() == 0) { + auto axisVector = INPUT_VARIABLE(1); + helpers::adjustAxis(input->rankOf(), axisVector, axis); + helpers::argAbsMin(*input, *output, axis); + } + else { + helpers::argAbsMin(*input, *output, axis); + } + + STORE_RESULT(output); + + return Status::OK(); + } + + DECLARE_SHAPE_FN(argamin) { + std::vector dims; + + if (block.width() == 1) { + dims = *block.getIArguments(); + } else { + auto y = INPUT_VARIABLE(1); + dims = y->template asVectorT(); + } + + auto keepDims = block.numB() ? B_ARG(0) : false; + auto dtype = block.numD() ? D_ARG(0) : DataType::INT64; + + // we're resolving negative axis here + helpers::adjustAxis(shape::rank(inputShape->at(0)), dims); + + auto in = inputShape->at(0); + for (auto d : dims) { + // we have special case here + if (d == sd::DataTypeUtils::max()) + continue; + + REQUIRE_TRUE(d < shape::rank(in), 0, "ArgAmin: axis can't be above rank") + REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgAmin: you can't reduce along axis with 0 in shape"); + } + + // special case - output is scalar + if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { + return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + } + + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); + } + } +} + +#endif diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp index 928a0f7d0..9c45b4c37 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * + * Copyright (c) 2019 Konduit K.K. * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. @@ -22,6 +22,7 @@ #if NOT_EXCLUDED(OP_argmax) #include +#include #include #include @@ -29,7 +30,7 @@ namespace sd { namespace ops { DECLARE_TYPES(argmax) { getOpDescriptor() - ->setAllowedInputTypes(sd::DataType::ANY) + ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS }) ->setAllowedOutputTypes({ALL_INTS}); } @@ -37,18 +38,19 @@ namespace sd { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); - auto axis = *block.getIArguments(); + if (output->isEmpty()) + return Status::OK(); + auto axis = *block.getIArguments(); + // axis might be dynamic (i.e. tf mode) if (block.width() > 1 && axis.size() == 0) { auto axisVector = INPUT_VARIABLE(1); helpers::adjustAxis(input->rankOf(), axisVector, axis); - - input->applyIndexReduce(indexreduce::IndexMax, *output, axis); + helpers::argMax(*input, *output, axis); } else { - helpers::adjustAxis(input->rankOf(), axis); + helpers::argMax(*input, *output, axis); - input->applyIndexReduce(indexreduce::IndexMax, *output, axis); } STORE_RESULT(output); @@ -66,23 +68,28 @@ namespace sd { dims = y->template asVectorT(); } + auto keepDims = block.numB() ? B_ARG(0) : false; + auto dtype = block.numD() ? D_ARG(0) : DataType::INT64; + // we're resolving negative axis here helpers::adjustAxis(shape::rank(inputShape->at(0)), dims); - if (dims.size() > 1) - std::sort(dims.begin(), dims.end()); + auto in = inputShape->at(0); + for (auto d : dims) { + // we have special case here + if (d == sd::DataTypeUtils::max()) + continue; - - for (auto d:dims) { - REQUIRE_TRUE(inputShape->at(0)[d+1] != 0, 0, "ArgMax: you can't reduce along axis with 0 in shape"); + REQUIRE_TRUE(d < shape::rank(in), 0, "ArgMax: axis can't be above rank") + REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgMax: you can't reduce along axis with 0 in shape"); } // special case - output is scalar - if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64)); + if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { + return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); } - return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), DataType::INT64, false, false, block.getWorkspace())); + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); } } } diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp index f4fb25daa..97430a24f 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp @@ -21,15 +21,17 @@ #include #if NOT_EXCLUDED(OP_argmin) -#include #include +#include +#include +#include namespace sd { namespace ops { DECLARE_TYPES(argmin) { getOpDescriptor() - ->setAllowedInputTypes(sd::DataType::ANY) + ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS }) ->setAllowedOutputTypes({ALL_INTS}); } @@ -39,16 +41,18 @@ namespace sd { auto output = OUTPUT_VARIABLE(0); + if (output->isEmpty()) + return Status::OK(); + // axis might be dynamic (i.e. tf mode) if (block.width() > 1 && axis.size() == 0) { auto axisVector = INPUT_VARIABLE(1); helpers::adjustAxis(input->rankOf(), axisVector, axis); + helpers::argMin(*input, *output, axis); + } + else { + helpers::argMin(*input, *output, axis); - input->applyIndexReduce(indexreduce::IndexMin, *output, axis); - } else { - helpers::adjustAxis(input->rankOf(), axis); - - input->applyIndexReduce(indexreduce::IndexMin, *output, axis); } STORE_RESULT(output); @@ -58,7 +62,7 @@ namespace sd { DECLARE_SHAPE_FN(argmin) { std::vector dims; - auto in = inputShape->at(0); + if (block.width() == 1) { dims = *block.getIArguments(); } else { @@ -66,23 +70,28 @@ namespace sd { dims = y->template asVectorT(); } + auto keepDims = block.numB() ? B_ARG(0) : false; + auto dtype = block.numD() ? D_ARG(0) : DataType::INT64; + // we're resolving negative axis here - helpers::adjustAxis(shape::rank(in), dims); + helpers::adjustAxis(shape::rank(inputShape->at(0)), dims); - if (dims.size() > 1) - std::sort(dims.begin(), dims.end()); + auto in = inputShape->at(0); + for (auto d : dims) { + // we have special case here + if (d == sd::DataTypeUtils::max()) + continue; - for (auto d:dims) { - REQUIRE_TRUE(inputShape->at(0)[d+1] != 0, 0, "ArgMin: you can't reduce along axis with 0 in shape"); + REQUIRE_TRUE(d < shape::rank(in), 0, "ArgMin: axis can't be above rank") + REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgMin: you can't reduce along axis with 0 in shape"); } // special case - output is scalar - if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64)); + if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { + return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); } - auto newShape = ShapeUtils::evalReduceShapeInfo('c', dims, in, DataType::INT64, false, false, block.getWorkspace()); - return SHAPELIST(newShape); + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); } } diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index 8fae1b63c..74221133c 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -52,6 +52,32 @@ namespace sd { DECLARE_CUSTOM_OP(argmin, 1, 1, false, 0, -2); #endif + /** + * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s)) + * Expected input: + * 0: N-dimensional array + * 1: optional axis vector + * + * Int args: + * 0: optional axis + */ + #if NOT_EXCLUDED(OP_argamax) + DECLARE_CUSTOM_OP(argamax, 1, 1, false, 0, -2); + #endif + + /** + * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s)) + * Expected input: + * 0: N-dimensional array + * 1: optional axis vector + * + * Int args: + * 0: optional axis + */ + #if NOT_EXCLUDED(OP_argamin) + DECLARE_CUSTOM_OP(argamin, 1, 1, false, 0, -2); + #endif + /** * This operation provides various normalization modes: * 0: frobenius diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in new file mode 100644 index 000000000..533a94aab --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +// +// @author AbdelRauf +// + +#include + +namespace sd { + namespace ops { + namespace helpers { + BUILD_DOUBLE_TEMPLATE(template void argAbsMax_, (const NDArray& input, NDArray& output, const std::vector& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in new file mode 100644 index 000000000..4f7c78505 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +// +// @author AbdelRauf +// + +#include + +namespace sd { + namespace ops { + namespace helpers { + BUILD_DOUBLE_TEMPLATE(template void argAbsMin_, (const NDArray& input, NDArray& output, const std::vector& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in new file mode 100644 index 000000000..770f155f4 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +// +// @author AbdelRauf +// + +#include + +namespace sd { + namespace ops { + namespace helpers { + BUILD_DOUBLE_TEMPLATE(template void argMax_, (const NDArray& input, NDArray& output, const std::vector& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in new file mode 100644 index 000000000..0149b890e --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +// +// @author AbdelRauf +// + +#include + +namespace sd { + namespace ops { + namespace helpers { + BUILD_DOUBLE_TEMPLATE(template void argMin_, (const NDArray& input, NDArray& output, const std::vector& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp index 94e74cd84..22258266b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp index 9820c1392..f2b891d5e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp index 2a78f285f..c475d994c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp index 13757997a..11175a02d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp index ea3043eeb..cea328084 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp index 60c1ae906..81bb8e897 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp index 6e33d5546..415ab39e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp index ef4a199fd..47d16e6db 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp index 71cd2ebb8..902ade68c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp similarity index 95% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp index e9db5c303..559564903 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp @@ -19,7 +19,7 @@ // #include -#include "../crop_and_resize.hpp" +#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" namespace sd { namespace ops { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp new file mode 100644 index 000000000..4665a7b6f --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp @@ -0,0 +1,56 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +// +// @author AbdelRauf +// + +#include + +namespace sd { + namespace ops { + namespace helpers { + ////////////////////////////////////////////////////////////////////////// + template + void argMax_(const NDArray& input, NDArray& output, const std::vector& dimensions); + + template + void argMin_(const NDArray& input, NDArray& output, const std::vector& dimensions); + + template + void argAbsMax_(const NDArray& input, NDArray& output, const std::vector& dimensions); + + template + void argAbsMin_(const NDArray& input, NDArray& output, const std::vector& dimensions); + + ////////////////////////////////////////////////////////////////////////// + void argMax(const NDArray& input, NDArray& output, const std::vector& dimensions) { + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argMax_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES); + } + + void argMin(const NDArray& input, NDArray& output, const std::vector& dimensions) { + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argMin_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES); + } + + void argAbsMax(const NDArray& input, NDArray& output, const std::vector& dimensions) { + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argAbsMax_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES); + } + + void argAbsMin(const NDArray& input, NDArray& output, const std::vector& dimensions) { + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argAbsMin_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp new file mode 100644 index 000000000..7d376e012 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp @@ -0,0 +1,900 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + // + // @author AbdelRauf + // +#include +#include +#include +#include +#include +#include +#include +#include +#if 1 +#define LOG_CALLS(X) +#else + +#define LOG_CALLS(X) nd4j_printf("___%s_________%d+\n", __PRETTY_FUNCTION__, X); +#endif +namespace sd { + namespace ops { + namespace helpers { + constexpr int threadingThreshold = 4096; + template + FORCEINLINE void indexInnerReductionRank1(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount) + { + argCurrent = 0; + current = buffer[0]; + LOG_CALLS(0) + Nd4jLong j_offset = 0; + for (Z j = 0; j < loopCount; j++) { + ReductionOp::update(current, argCurrent, buffer[j], j); + } + } + + template + FORCEINLINE void indexInnerReductionRank1(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount, const Nd4jLong& inner_stride) + { + argCurrent = 0; + current = buffer[0]; + LOG_CALLS(0) + Nd4jLong j_offset = 0; + for (Z j = 0; j < loopCount; j++) { + ReductionOp::update(current, argCurrent, buffer[j_offset], j); + j_offset += inner_stride; + } + } + + template + FORCEINLINE void indexInnerReductionConstRank(const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong outerLoopCount, const Nd4jLong& innerLoopCount) + { + //skip 1 from the beginning or end depending the Order + constexpr size_t updated_index = LastIndexFaster ? 0 : 1; + constexpr size_t updated_rank = constRank - 1; + sd::CoordsState cst; + //we skip 1 + size_t offset = sd::init_coords(cst, 0, bases + updated_index, strides + updated_index); + Z startIndex = 0; + argCurrent = 0; + current = buffer[offset]; + LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + //typename std::make_signed::type iArgMax = -1; + for (Z j = 0; j < innerLoopCount; j++) { + ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex); + } + //we skip 1 + offset = sd::inc_coords(cst, offset); + startIndex += innerLoopCount; + } + } + + template + FORCEINLINE void indexInnerReductionConstRank(const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong outerLoopCount, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride) + { + //skip 1 from the beginning or end depending the Order + constexpr size_t updated_index = LastIndexFaster ? 0 : 1; + constexpr size_t updated_rank = constRank - 1; + sd::CoordsState cst; + //we skip 1 + size_t offset = sd::init_coords(cst, 0, bases + updated_index, strides + updated_index); + Z startIndex = 0; + argCurrent = 0; + current = buffer[offset]; + LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + for (Z j = 0; j < innerLoopCount; j++) { + ReductionOp::update(current, argCurrent, *inner_buffer, j + startIndex); + inner_buffer += inner_stride; + } + //we alreaddy skiped + offset = sd::inc_coords(cst, offset); + startIndex += innerLoopCount; + } + } + + template + FORCEINLINE void indexInnerReduction(const int& rank, const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong& outerLoopStart, const Nd4jLong& outerLoopStop, const Nd4jLong& innerLoopCount) + { + size_t offset = 0; + Nd4jLong outerLoopCount = outerLoopStop - outerLoopStart; + Nd4jLong coords[MAX_RANK] = {}; + Nd4jLong* ptr_coords = (Nd4jLong*)&coords; + if (outerLoopStart > 0) { + sd::index2coords_C(outerLoopStart, rank - 1, bases, ptr_coords); + offset = sd::offset_from_coords(strides, ptr_coords, rank); + } + Z startIndex = outerLoopStart * innerLoopCount; + argCurrent = startIndex; + current = buffer[offset]; + LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + //typename std::make_signed::type iArgMax = -1; + for (Z j = 0; j < innerLoopCount; j++) { + //nd4j_printf("%f\n", inner_buffer[j]); + ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex); + } + offset = inc_coords(bases, strides, ptr_coords, offset, rank, 1); + //if (iArgMax >= 0) argCurrent = startIndex + iArgMax; + startIndex += innerLoopCount; + } + } + + template + FORCEINLINE void indexInnerReduction(const int& rank, const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong& outerLoopStart, const Nd4jLong& outerLoopStop, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride) + { + size_t offset = 0; + Nd4jLong outerLoopCount = outerLoopStop - outerLoopStart; + Nd4jLong coords[MAX_RANK] = {}; + Nd4jLong* ptr_coords = (Nd4jLong*)&coords; + if (outerLoopStart > 0) { + sd::index2coords_C(outerLoopStart, rank - 1, bases, ptr_coords); + offset = sd::offset_from_coords(strides, ptr_coords, rank); + } + Z startIndex = outerLoopStart * innerLoopCount; + argCurrent = startIndex; + current = buffer[offset]; + LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + //typename std::make_signed::type iArgMax = -1; + for (Z j = 0; j < innerLoopCount; j++) { + ReductionOp::update(current, argCurrent, inner_buffer[j * inner_stride], startIndex + j); + } + offset = inc_coords(bases, strides, ptr_coords, offset, rank, 1); + //offset = inc_coords(bases, strides, ptr_coords, offset, rank, 1); + //if (iArgMax >= 0) argCurrent = startIndex + iArgMax; + startIndex += innerLoopCount; + } + } + + template + FORCEINLINE void indexInnerReductionRank1Block4WithMerge(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount) + { + argCurrent = 0; + current = buffer[0]; + LOG_CALLS(0) + Nd4jLong loopCount4 = loopCount / 4; + Nd4jLong loopCountEnd = loopCount4 + (loopCount & 3); + const X* buffer1 = buffer + 1 * loopCount4; + const X* buffer2 = buffer1 + 1 * loopCount4; + const X* buffer3 = buffer2 + 1 * loopCount4; + X current1 = *buffer1; + X current2 = *buffer2; + X current3 = *buffer3; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + for (Z j = 0; j < loopCount4; j++) { + ReductionOp::update(current, argCurrent, buffer[j], j); + ReductionOp::update(current1, argCurrent1, buffer1[j], j); + ReductionOp::update(current2, argCurrent2, buffer2[j], j); + ReductionOp::update(current3, argCurrent3, buffer3[j], j); + } + //tail + for (Z j = loopCount4; j < loopCountEnd; j++) { + ReductionOp::update(current3, argCurrent3, buffer3[j], j); + } + //merge + argCurrent1 += loopCount4; + argCurrent2 += 2 * loopCount4; + argCurrent3 += 3 * loopCount4; + ReductionOp::update(current, argCurrent, current1, argCurrent1); + ReductionOp::update(current, argCurrent, current2, argCurrent2); + ReductionOp::update(current, argCurrent, current3, argCurrent3); + } + + template + FORCEINLINE void indexInnerReductionRank1Block4WithMerge(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount, const Nd4jLong& inner_stride) + { + argCurrent = 0; + current = buffer[0]; + LOG_CALLS(0) + Nd4jLong loopCount4 = loopCount / 4; + Nd4jLong loopCountEnd = loopCount4 + (loopCount & 3); + const X* buffer1 = buffer + inner_stride * loopCount4; + const X* buffer2 = buffer1 + inner_stride * loopCount4; + const X* buffer3 = buffer2 + inner_stride * loopCount4; + X current1 = *buffer1; + X current2 = *buffer2; + X current3 = *buffer3; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + Nd4jLong j_offset = 0; + for (Z j = 0; j < loopCount4; j++) { + ReductionOp::update(current, argCurrent, buffer[j_offset], j); + ReductionOp::update(current1, argCurrent1, buffer1[j_offset], j); + ReductionOp::update(current2, argCurrent2, buffer2[j_offset], j); + ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j); + j_offset += inner_stride; + } + //tail + for (Z j = loopCount4; j < loopCountEnd; j++) { + ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j); + j_offset += inner_stride; + } + //merge + argCurrent1 += loopCount4; + argCurrent2 += 2 * loopCount4; + argCurrent3 += 3 * loopCount4; + ReductionOp::update(current, argCurrent, current1, argCurrent1); + ReductionOp::update(current, argCurrent, current2, argCurrent2); + ReductionOp::update(current, argCurrent, current3, argCurrent3); + } + + template + FORCEINLINE void indexInnerReductionRank1Block4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong& loopCount) + { + LOG_CALLS(0) + Z argCurrent = 0; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + X current = buffer[0]; + X current1 = buffer1[0]; + X current2 = buffer2[0]; + X current3 = buffer3[0]; + for (Z j = 0; j < loopCount; j++) { + ReductionOp::update(current, argCurrent, buffer[j], j); + ReductionOp::update(current1, argCurrent1, buffer1[j], j); + ReductionOp::update(current2, argCurrent2, buffer2[j], j); + ReductionOp::update(current3, argCurrent3, buffer3[j], j); + } + *output = argCurrent; + *output1 = argCurrent1; + *output2 = argCurrent2; + *output3 = argCurrent3; + return; + } + + template + FORCEINLINE void indexInnerReductionRank1Block4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong& loopCount, const Nd4jLong& inner_stride) + { + LOG_CALLS(0) + Z argCurrent = 0; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + X current = buffer[0]; + X current1 = buffer1[0]; + X current2 = buffer2[0]; + X current3 = buffer3[0]; + Nd4jLong j_offset = 0; + for (Z j = 0; j < loopCount; j++) { + ReductionOp::update(current, argCurrent, buffer[j_offset], j); + ReductionOp::update(current1, argCurrent1, buffer1[j_offset], j); + ReductionOp::update(current2, argCurrent2, buffer2[j_offset], j); + ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j); + j_offset += inner_stride; + } + *output = argCurrent; + *output1 = argCurrent1; + *output2 = argCurrent2; + *output3 = argCurrent3; + return; + } + + template + FORCEINLINE void indexInnerReductionConstRankBlock4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, + Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong* bases, const Nd4jLong* strides, + const Nd4jLong& outerLoopCount, const Nd4jLong& innerLoopCount) + { + LOG_CALLS(0) + //skip 1 from the beginning or end depending the Order + constexpr size_t updated_index = LastIndexFaster ? 0 : 1; + constexpr size_t updated_rank = constRank - 1; + sd::CoordsState cst; + //we skip 1 + size_t offset = sd::init_coords(cst, 0, bases + updated_index, strides + updated_index); + Z startIndex = 0; + Z argCurrent = 0; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + X current = buffer[0]; + X current1 = buffer1[0]; + X current2 = buffer2[0]; + X current3 = buffer3[0]; + //LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + const X* inner_buffer1 = &(buffer1[offset]); + const X* inner_buffer2 = &(buffer2[offset]); + const X* inner_buffer3 = &(buffer3[offset]); + //typename std::make_signed::type iArgMax = -1; + for (Z j = 0; j < innerLoopCount; j++) { + ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex); + ReductionOp::update(current1, argCurrent1, inner_buffer1[j], j + startIndex); + ReductionOp::update(current2, argCurrent2, inner_buffer2[j], j + startIndex); + ReductionOp::update(current3, argCurrent3, inner_buffer3[j], j + startIndex); + } + //we skip 1 + offset = sd::inc_coords(cst, offset); + startIndex += innerLoopCount; + } + *output = argCurrent; + *output1 = argCurrent1; + *output2 = argCurrent2; + *output3 = argCurrent3; + return; + } + + template + FORCEINLINE void indexInnerReductionConstRankBlock4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, + Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong* bases, const Nd4jLong* strides, + const Nd4jLong& outerLoopCount, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride) + { + LOG_CALLS(0) + //skip 1 from the beginning or end depending the Order + constexpr size_t updated_index = LastIndexFaster ? 0 : 1; + constexpr size_t updated_rank = constRank - 1; + sd::CoordsState cst; + //we skip 1 + size_t offset = sd::init_coords(cst, 0, bases + updated_index, strides + updated_index); + Z startIndex = 0; + Z argCurrent = 0; + Z argCurrent1 = 0; + Z argCurrent2 = 0; + Z argCurrent3 = 0; + X current = buffer[0]; + X current1 = buffer1[0]; + X current2 = buffer2[0]; + X current3 = buffer3[0]; + //LOG_CALLS(0) + for (Z i = 0; i < outerLoopCount; i++) { + const X* inner_buffer = &(buffer[offset]); + const X* inner_buffer1 = &(buffer1[offset]); + const X* inner_buffer2 = &(buffer2[offset]); + const X* inner_buffer3 = &(buffer3[offset]); + //typename std::make_signed::type iArgMax = -1; + Nd4jLong inner_offset = 0; + for (Z j = 0; j < innerLoopCount; j++) { + ReductionOp::update(current, argCurrent, inner_buffer[inner_offset], j + startIndex); + ReductionOp::update(current1, argCurrent1, inner_buffer1[inner_offset], j + startIndex); + ReductionOp::update(current2, argCurrent2, inner_buffer2[inner_offset], j + startIndex); + ReductionOp::update(current3, argCurrent3, inner_buffer3[inner_offset], j + startIndex); + inner_offset += inner_stride; + } + //we skip 1 + offset = sd::inc_coords(cst, offset); + startIndex += innerLoopCount; + } + *output = argCurrent; + *output1 = argCurrent1; + *output2 = argCurrent2; + *output3 = argCurrent3; + return; + } + + template + void argIndexCase1Scalar(const int& second_rank,const Nd4jLong* inner_bases,const Nd4jLong* inner_strides, const X* bufferX, Z* outputZ) + { + Nd4jLong inner_total; + Nd4jLong inner_last = 0; + int maxThreads = sd::Environment::getInstance()->maxMasterThreads(); + if (second_rank == 1) { + inner_total = inner_bases[0]; + if (inner_total < threadingThreshold) { + maxThreads = 1; + } + } + else { + inner_total = getLength(inner_bases, second_rank, 1, inner_last); + if (inner_total * inner_last < threadingThreshold) { + maxThreads = 1; + } + } + + + + std::unique_ptr maxValues(new X[maxThreads]); + std::unique_ptr maxIndices(new Z[maxThreads]); + X* ptrMaxValues = maxValues.get(); + Z* ptrMaxIndices = maxIndices.get(); + auto func = [ptrMaxValues, ptrMaxIndices, inner_last, second_rank, inner_bases, inner_strides, bufferX](uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { + //LOG_CALLS(0) + const Nd4jLong inner_stride = LastIndexFaster ? inner_strides[second_rank - 1] : inner_strides[0]; + Z argCurrent; X current; + if (second_rank == 1) { + const Nd4jLong loopTotal = stop - start; + if (inner_stride == 1) { + indexInnerReductionRank1Block4WithMerge(&(bufferX[start]), current, argCurrent, loopTotal); + } + else { + indexInnerReductionRank1Block4WithMerge(&(bufferX[start * inner_stride]), current, argCurrent, loopTotal, inner_stride); + } + ptrMaxIndices[thread_id] = argCurrent + start; + } + else { + if (inner_stride == 1) { + indexInnerReduction(second_rank, bufferX, current, argCurrent, inner_bases, inner_strides, start, stop, inner_last, inner_stride); + } + else { + indexInnerReduction(second_rank, bufferX, current, argCurrent, inner_bases, inner_strides, start, stop, inner_last, inner_stride); + } + ptrMaxIndices[thread_id] = argCurrent; + } + ptrMaxValues[thread_id] = current; + }; +#if 0 + int Count = 0; + func(0, 0, inner_total, 1); +#else + int Count = samediff::Threads::parallel_tad(func, 0, inner_total, 1, maxThreads); +#endif + Z arg = 0; + X current = ptrMaxValues[0]; + + for (Z i = 1; i < Count; i++) { + ReductionOp::update(current, arg, ptrMaxValues[i], i); + } + + *outputZ = ptrMaxIndices[arg]; + } + + + template + void argReductionInnerCases(Movement& movement, Nd4jLong loopTotal, const int& second_rank,const Nd4jLong* inner_bases,const Nd4jLong* inner_strides, const X* bufferX, Z* outputZ) + { + + Nd4jLong inner_stride = true /*LastIndexFaster*/ ? inner_strides[second_rank - 1] : inner_strides[0]; + + Nd4jLong loopTotal_K = loopTotal / 4; + Nd4jLong loopTotal_Tail = loopTotal & 3; + if (inner_stride == 1) { + if (second_rank == 1) { + LOG_CALLS(0) + Nd4jLong inner_total = getLength(inner_bases, second_rank); + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionRank1Block4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_total); + + } + if (inner_total >= 2048) { + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionRank1Block4WithMerge(buffer0, current, outputZ[movement.Second()], inner_total); + movement.increment(); + } + } + else { + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionRank1(buffer0, current, outputZ[movement.Second()], inner_total); + movement.increment(); + } + } + + } + else { + Nd4jLong inner_last; + Nd4jLong inner_loop = getLength(inner_bases, second_rank, 1, inner_last); + if (second_rank == 2) { + LOG_CALLS(1) + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionConstRankBlock4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides, + inner_loop, inner_last); + + } + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionConstRank(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, inner_loop, inner_last); + movement.increment(); + } + + } + else if (second_rank == 3) { + LOG_CALLS(2) + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionConstRankBlock4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides, + inner_loop, inner_last); + + } + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionConstRank(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, + inner_loop, inner_last); + movement.increment(); + } + + } + else { + LOG_CALLS(3) + //nd4j_printf("-----%d \n", loopTotal); + for (Nd4jLong i = 0; i < loopTotal; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReduction(second_rank, buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, 0, + inner_loop, inner_last); + movement.increment(); + } + + } + } + + } + else { + if (second_rank == 1) { + LOG_CALLS(10) + Nd4jLong inner_total = getLength(inner_bases, second_rank); + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionRank1Block4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_total, inner_stride); + + } + if (inner_total >= 2048) { + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionRank1Block4WithMerge(buffer0, current, outputZ[movement.Second()], inner_total, inner_stride); + movement.increment(); + } + } + else { + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionRank1(buffer0, current, outputZ[movement.Second()], inner_total, inner_stride); + movement.increment(); + } + } + + } + else { + Nd4jLong inner_last; + Nd4jLong inner_loop = getLength(inner_bases, second_rank, 1, inner_last); + if (second_rank == 2) { + LOG_CALLS(11) + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionConstRankBlock4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides, + inner_loop, inner_last, inner_stride); + + } + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionConstRank(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, + inner_loop, inner_last, inner_stride); + movement.increment(); + } + + } + else if (second_rank == 3) { + LOG_CALLS(12) + for (Nd4jLong i = 0; i < loopTotal_K; i++) { + const X* buffer0 = &(bufferX[movement.First()]); + Z* output0 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer1 = &(bufferX[movement.First()]); + Z* output1 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer2 = &(bufferX[movement.First()]); + Z* output2 = &(outputZ[movement.Second()]); + movement.increment(); + const X* buffer3 = &(bufferX[movement.First()]); + Z* output3 = &(outputZ[movement.Second()]); + movement.increment(); + indexInnerReductionConstRankBlock4(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides, + inner_loop, inner_last, inner_stride); + + } + for (Nd4jLong i = 0; i < loopTotal_Tail; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReductionConstRank(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, + inner_loop, inner_last, inner_stride); + movement.increment(); + } + + } + else { + LOG_CALLS(13) + //nd4j_printf("-------%d inner loop %d inner_last %d\n", loopTotal, inner_loop,inner_last); + for (Nd4jLong i = 0; i < loopTotal; i++) { + X current; + const X* buffer0 = &(bufferX[movement.First()]); + indexInnerReduction(second_rank, buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, 0, + inner_loop, inner_last, inner_stride); + movement.increment(); + } + + } + } + + } + + } + + template + void argIndexCaseNonScalar(const int& first_rank, const int& output_rank, bool squashed, const int& second_rank, + const Nd4jLong*& outer_bases,const Nd4jLong* outer_strides,const Nd4jLong* output_strides, const Nd4jLong &output_stride, + const Nd4jLong*& inner_bases,const Nd4jLong* inner_strides, const X* bufferX, Z* outputZ) + { + + Nd4jLong total = getLength(outer_bases, first_rank); + Nd4jLong inner_stride = true /*LastIndexFaster*/ ? inner_strides[second_rank - 1] : inner_strides[0]; + Nd4jLong outer_stride = LastIndexFaster ? outer_strides[second_rank - 1] : outer_strides[0]; + auto func = [first_rank, output_rank, squashed, outer_bases, outer_strides, output_strides, output_stride, second_rank, inner_bases, inner_strides, bufferX, outputZ](uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { + + Nd4jLong loopTotal = stop - start; + Nd4jLong stride = LastIndexFaster ? outer_strides[first_rank - 1] : outer_strides[0]; + if (first_rank == 1) { + + if (stride == 1) { + ZipGenericCoordsRank1Stride1 movement; + movement.init(nullptr, nullptr, nullptr, 0, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + } + else { + ZipGenericCoordsRank1BothStrideN movement; + movement.init(nullptr, &stride, &output_stride, 0, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + + } + else if (squashed && first_rank <= output_rank) { + if (first_rank == 2) { + if (output_stride == 1) { + ZipGenericCoordsConstMovementSecondStride1<2, LastIndexFaster> movement; + movement.init(outer_bases, outer_strides, nullptr, first_rank, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + else { + ZipGenericCoordsConstMovementSecondStrideN<2, LastIndexFaster> movement; + movement.init(outer_bases, outer_strides, &output_stride, first_rank, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + } + else if (first_rank == 3) { + if (output_stride == 1) { + ZipGenericCoordsConstMovementSecondStride1<3, LastIndexFaster> movement; + movement.init(outer_bases, outer_strides, nullptr, first_rank, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + else { + ZipGenericCoordsConstMovementSecondStrideN<3, LastIndexFaster> movement; + movement.init(outer_bases, outer_strides, &output_stride, first_rank, start); + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + } + else { + ZipGenericCoordsMovementSecondStrideN< LastIndexFaster> movement; + movement.init(outer_bases, outer_strides, &output_stride, first_rank, start); + + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + + } + else { + ZipGenericCoordsMovement movement; + movement.init(outer_bases, outer_strides, output_strides, first_rank, start); + + argReductionInnerCases(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ); + + } + + }; +#if 0 + func(0, 0, total, 1); +#else + // + uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads(); + Nd4jLong inner_total = getLength(inner_bases, second_rank); + if (total * inner_total <= threadingThreshold) { + numThreads = 1; + } + else { + if (inner_stride > outer_stride && total <= 256) { + auto desired = total > 4 ? (total / 4) : 1; + numThreads = numThreads > desired ? desired : numThreads; + } + } + + samediff::Threads::parallel_tad(func, 0, total, 1, numThreads); +#endif + } + + template + void argIndex_(const NDArray& input, NDArray& output, const std::vector& dimensions) { + char input_order = input.ordering(); + bool try_squash_outer = (input_order == output.ordering()) && output.ews() != 0; + const Nd4jLong* input_shapeInfo = input.shapeInfo(); + const Nd4jLong* output_shapeInfo = output.shapeInfo(); + const Nd4jLong rank = input_shapeInfo[0]; + const Nd4jLong* input_bases = &(input_shapeInfo[1]); + const Nd4jLong* input_strides = &(input_shapeInfo[rank + 1]); + const Nd4jLong output_rank = output_shapeInfo[0]; + const Nd4jLong* output_strides = &(output_shapeInfo[output_rank + 1]); + Nd4jLong new_bases[MAX_RANK]; + Nd4jLong new_strides[MAX_RANK]; + int first_begin, first_end, second_begin, second_end; + //rePartition into two parts based on the selection + rePartition(input_order, dimensions, rank, input_bases, input_strides, new_bases, new_strides, first_begin, first_end, second_begin, second_end, try_squash_outer, input_order == 'c'); + int first_rank = first_end - first_begin; //the first rank can be 0 for scalar cases + int second_rank = second_end - second_begin; + auto bufferX = input.bufferAsT(); + auto outputZ = output.bufferAsT(); + const Nd4jLong* outer_bases = &(new_bases[first_begin]); + const Nd4jLong* outer_strides = &(new_strides[first_begin]); + const Nd4jLong* inner_bases = &(new_bases[second_begin]); + const Nd4jLong* inner_strides = &(new_strides[second_begin]); + const Nd4jLong output_stride = output.ordering() == 'c' ? output_strides[output_rank-1]:output_strides[0]; + if (input_order == 'c') { + if (first_rank == 0) { + argIndexCase1Scalar(second_rank, inner_bases, inner_strides, bufferX, outputZ); + } + else { + argIndexCaseNonScalar(first_rank, output_rank, try_squash_outer, second_rank, outer_bases, outer_strides, output_strides, + output_stride,inner_bases, inner_strides, bufferX, outputZ); + } + } + else { + if (first_rank == 0) { + LOG_CALLS(0); + if (second_rank == 1) { + argIndexCase1Scalar(second_rank, inner_bases, inner_strides, bufferX, outputZ); + } + else { + argIndexCase1Scalar(second_rank, inner_bases, inner_strides, bufferX, outputZ); + } + } + else { + LOG_CALLS(1); + argIndexCaseNonScalar(first_rank, output_rank, try_squash_outer, second_rank, outer_bases, outer_strides, output_strides, + output_stride, inner_bases, inner_strides, bufferX, outputZ); + } + } + } + + template + struct IndexMax { + static FORCEINLINE void update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) { + if (candidate > current) { + current = candidate; + currentIndex = candidateIndex; + } + } + }; + + template + struct IndexMin { + static FORCEINLINE void update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) { + if (candidate < current) { + current = candidate; + currentIndex = candidateIndex; + } + } + }; + + template + struct IndexAbsMax { + static FORCEINLINE void update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) { + auto absCandidate = sd::math::nd4j_abs(candidate); + if (absCandidate > current) { + current = absCandidate; + currentIndex = candidateIndex; + } + } + }; + + template + struct IndexAbsMin { + static FORCEINLINE void update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) { + auto absCandidate = sd::math::nd4j_abs(candidate); + if (absCandidate < current) { + current = absCandidate; + currentIndex = candidateIndex; + } + } + }; + + + ////////////////////////////////////////////////////////////////////////// + template + void argMax_(const NDArray& input, NDArray& output, const std::vector& dimensions) { + return argIndex_>(input, output, dimensions); + } + + template + void argMin_(const NDArray& input, NDArray& output, const std::vector& dimensions) { + return argIndex_>(input, output, dimensions); + } + + template + void argAbsMax_(const NDArray& input, NDArray& output, const std::vector& dimensions) { + return argIndex_>(input, output, dimensions); + } + + template + void argAbsMin_(const NDArray& input, NDArray& output, const std::vector& dimensions) { + return argIndex_>(input, output, dimensions); + } + } + } +} diff --git a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu new file mode 100644 index 000000000..9876417df --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu @@ -0,0 +1,106 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#include +#include + +namespace sd { + namespace ops { + namespace helpers { + ////////////////////////////////////////////////////////////////////////// + void argMax(const NDArray& input, NDArray& output, const std::vector& dimensions) { + NDArray::prepareSpecialUse({&output}, {&input}); + if (output.isScalar()) { + NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); + } + else { + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + + NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, + input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), + nullptr, + output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(), + (int*) nullptr, dimensions.size(), + tadPack.specialShapeInfo(), tadPack.specialOffsets()); + } + + NDArray::registerSpecialUse({ &output }, { &input }); + } + + void argMin(const NDArray& input, NDArray& output, const std::vector& dimensions) { + NDArray::prepareSpecialUse({ &output }, { &input }); + if (output.isScalar()) { + NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); + } + else { + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + + NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, + input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), + nullptr, + output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(), + (int*) nullptr, dimensions.size(), + tadPack.specialShapeInfo(), tadPack.specialOffsets()); + } + + NDArray::registerSpecialUse({ &output }, { &input }); + } + + void argAbsMax(const NDArray& input, NDArray& output, const std::vector& dimensions) { + NDArray::prepareSpecialUse({ &output }, { &input }); + if (output.isScalar()) { + NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); + } + else { + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + + NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, + input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), + nullptr, + output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(), + (int*) nullptr, dimensions.size(), + tadPack.specialShapeInfo(), tadPack.specialOffsets()); + } + + NDArray::registerSpecialUse({ &output }, { &input }); + } + + void argAbsMin(const NDArray& input, NDArray& output, const std::vector& dimensions) { + NDArray::prepareSpecialUse({ &output }, { &input }); + if (output.isScalar()) { + NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); + } + else { + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + + NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, + input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), + nullptr, + output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(), + (int *) nullptr, dimensions.size(), + tadPack.specialShapeInfo(), tadPack.specialOffsets()); + } + + NDArray::registerSpecialUse({&output}, {&input}); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/reductions.h b/libnd4j/include/ops/declarable/helpers/reductions.h new file mode 100644 index 000000000..ee199fd16 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/reductions.h @@ -0,0 +1,41 @@ + +/******************************************************************************* + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + // + // @author AbdelRauf (rauf@konduit.ai) + // + +#ifndef LIBND4J_HELPERS_REDUCTIONS_H +#define LIBND4J_HELPERS_REDUCTIONS_H + +#include +#include +#include + +namespace sd { + namespace ops { + namespace helpers { + + void argMax(const NDArray& input, NDArray& output, const std::vector& dimensions); + void argAbsMax(const NDArray& input, NDArray& output, const std::vector& dimensions); + void argMin(const NDArray& input, NDArray& output, const std::vector& dimensions); + void argAbsMin(const NDArray& input, NDArray& output, const std::vector& dimensions); + + } + } +} + +#endif \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp index ce5038020..f111a888a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp @@ -40,6 +40,19 @@ public: } }; + +TEST_F(DeclarableOpsTests19, test_argmax_maxint_vector_1) { + auto x = NDArrayFactory::create('c', {3}, {0.1f, 0.5f, 0.7f}); + auto z = NDArrayFactory::create(0); + auto e = NDArrayFactory::create(2); + + sd::ops::argmax op; + auto status = op.execute({&x}, {&z}, {DataTypeUtils::max()}); + ASSERT_EQ(Status::OK(), status); + ASSERT_EQ(e, z); +} + + TEST_F(DeclarableOpsTests19, test_threshold_encode_1) { auto x = NDArrayFactory::create('c', {3}, {1.5, 2.5, -3.5}); auto exp_encoded = NDArrayFactory::create('c', {7}, {3, 3, 1056964608, 0, 1, 2, -3}); @@ -276,6 +289,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) { } + TEST_F(DeclarableOpsTests19, test_matmul_ccc) { auto x = NDArrayFactory::create('c', {10, 10}); auto y = NDArrayFactory::create('c', {10, 10}); diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index 166ba058f..f8086c9fe 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -43,9 +43,12 @@ #include #include #include - +#include #include #include +#include +#include +#include using namespace sd; using namespace sd::graph; @@ -275,6 +278,256 @@ TEST_F(PlaygroundTests, test_one_off_ops_1) { op.execute({&x, &y}, {&z}); } +#if defined(INDEX_REDUCTIONS_BENCH_TESTS) +//temporarly, testing against the original one +void original_argmax(const NDArray& input, std::vector& axis, NDArray& output) { + sd::ops::helpers::adjustAxis(input.rankOf(), axis); + input.applyIndexReduce(sd::indexreduce::IndexMax, output, axis); +} + +template +void fill_random(sd::NDArray& arr) { + Nd4jLong coords[MAX_RANK] = {}; + std::random_device rd; + std::mt19937 gen(rd()); + //for floats + std::uniform_real_distribution dis((T)-10.0, (T)22.9); + T* x = arr.bufferAsT(); + Nd4jLong* shapeInfo = arr.getShapeInfo(); + Nd4jLong* strides = arr.stridesOf(); + Nd4jLong rank = shapeInfo[0]; + Nd4jLong* bases = &(shapeInfo[1]); + size_t t = 1; + for (size_t i = 0; i < rank ; i++) { + t *= bases[i]; + } + size_t offset = 0; + if (arr.ordering() == 'c') { + + for (size_t i = 0; i < t; i++) { + x[offset] = dis(gen) ; + offset = sd::inc_coords(bases, strides, coords, offset, rank); + } + + } + else { + + for (size_t i = 0; i < t; i++) { + x[offset] = dis(gen) ; + offset = sd::inc_coords(bases, strides, coords, offset, rank); + } + + } +} + +void testLegacy(bool random) { +#if 0 + int bases[] = { 3, 2, 4, 5, 7 }; + constexpr int Loop = 1; +#else + int bases[] = { 8, 32, 64, 32, 64 }; + constexpr int Loop = 10; +#endif + constexpr int N = 5; + + auto x = NDArrayFactory::create('c', { bases[0], bases[1], bases[2], bases[3], bases[4] }); + if (!random) { + x.linspace(1); + } + else{ + fill_random(x); + } + +#define COMBINATIONS 1 +#if COMBINATIONS +//https://www.rosettacode.org/wiki/Combinations#C.2B.2B +for (int k = N; k >= 1; k--) { + + std::string bitmask(k, 1); // K leading 1's + bitmask.resize(N, 0); // N-K trailing 0's + + do { + + + std::vector dimension; + + std::vector output_bases; + + for (int i = 0; i < N; ++i) // [0..N-1] integers + { + if (bitmask[i]) dimension.push_back(i); + else { + output_bases.push_back(bases[i]); + } + } +#else +std::vector dimension = { 0,1,2,3 }; +int k = 4; +#endif +auto dim = NDArrayFactory::create(dimension); + +#if 1 +nd4j_printf("C(N:%d K:%d) \n", N, k); +dim.printIndexedBuffer("Dimension"); +for (int xind : dimension) { + nd4j_printf(" %d ,", bases[xind]); +} +nd4j_printf("%s", "\n"); +#endif + + + +std::vector values; +sd::ResultSet result; +for (int e = 0; e < Loop; e++) { + auto timeStart = std::chrono::system_clock::now(); + NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create('c', output_bases) : NDArrayFactory::create(0); + original_argmax(x, dimension, exp); + auto timeEnd = std::chrono::system_clock::now(); + auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); + values.emplace_back(outerTime); +} + +std::sort(values.begin(), values.end()); + +nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); +#if COMBINATIONS + + } while (std::prev_permutation(bitmask.begin(), bitmask.end())); + +} +#endif +} + +#define DEBUG 1 + +void testNewReduction(bool random, bool checkCorrectness = false , char order ='c') { + std::vector arr_dimensions; +#if defined(DEBUG) + int bases[] = { 3, 2, 3, 3, 5 ,4,7,4,7,7 }; + constexpr int Loop = 1; + constexpr int N = 10; +#else + int bases[] = { 8, 32, 64, 32, 64 }; + constexpr int Loop = 10; + constexpr int N = 5; + +#endif + + for (int i = 0; i < N; i++) { + arr_dimensions.push_back(bases[i]); + } + auto x = NDArrayFactory::create(order,arr_dimensions); + if (!random) { + x.linspace(1); + } + else { + fill_random(x); + } + +#define COMBINATIONS 1 +#if COMBINATIONS + //https://www.rosettacode.org/wiki/Combinations#C.2B.2B + for (int k = N; k >= 1; k--) { + + std::string bitmask(k, 1); // K leading 1's + bitmask.resize(N, 0); // N-K trailing 0's + + do { + + + std::vector dimension; + + std::vector output_bases; + + for (int i = 0; i < N; ++i) // [0..N-1] integers + { + if (bitmask[i]) dimension.push_back(i); + else { + output_bases.push_back(bases[i]); + } + } +#else + std::vector dimension = { 0,1,2,3 }; + int k = 4; +#endif + auto dim = NDArrayFactory::create(dimension); + +#if 1 + nd4j_printf("C(N:%d K:%d) \n", N, k); + dim.printIndexedBuffer("Dimension"); + for (int xind : dimension) { + nd4j_printf(" %d ,", bases[xind]); + } + nd4j_printf("%s", "\n"); +#endif + + + sd::ops::argmax op; + std::vector values; + sd::ResultSet result; + for (int e = 0; e < Loop; e++) { + auto timeStart = std::chrono::system_clock::now(); + result = op.evaluate({ &x, &dim }, {}, {}); + auto timeEnd = std::chrono::system_clock::now(); + auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); + values.emplace_back(outerTime); + } + auto z = result.at(0); + + if (checkCorrectness) { + //check for the correctness + NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create('c', output_bases) : NDArrayFactory::create(0); + original_argmax(x, dimension, exp); + + +#if 0// defined(DEBUG) + x.printIndexedBuffer("X"); + exp.printIndexedBuffer("Expected"); + z->printIndexedBuffer("Z"); +#endif + + ASSERT_TRUE(exp.isSameShape(z)); + ASSERT_TRUE(exp.equalsTo(z)); + } + std::sort(values.begin(), values.end()); + + nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); +#if COMBINATIONS + + } while (std::prev_permutation(bitmask.begin(), bitmask.end())); + + } +#endif +} + +constexpr bool test_corr = true; +#if !defined(DEBUG) +TEST_F(PlaygroundTests, ArgMaxPerfLinspace) { + testNewReduction(false, test_corr); +} +#endif + +TEST_F(PlaygroundTests, ArgMaxPerfRandom) { + testNewReduction(true, test_corr); +} + +TEST_F(PlaygroundTests, ArgMaxPerfRandomOrderF) { + testNewReduction(true, test_corr, 'f'); +} + +#if !defined(DEBUG) +TEST_F(PlaygroundTests, ArgMaxPerfLegacyLinspace) { + testLegacy(false); +} + +TEST_F(PlaygroundTests, ArgMaxPerfLegacyRandom) { + testLegacy(true); +} + +#endif + +#endif /* diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java index 79bd82ad3..8190c4849 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java @@ -106,7 +106,7 @@ public class SDBaseOps { public SDVariable argmax(SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, keepDims, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable(); } /** @@ -130,7 +130,7 @@ public class SDBaseOps { public SDVariable argmax(String name, SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, keepDims, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -153,7 +153,7 @@ public class SDBaseOps { public SDVariable argmax(SDVariable in, int... dimensions) { SDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, false, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable(); } /** @@ -176,7 +176,7 @@ public class SDBaseOps { public SDVariable argmax(String name, SDVariable in, int... dimensions) { SDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, false, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -203,7 +203,7 @@ public class SDBaseOps { public SDVariable argmin(SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, keepDims, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable(); } /** @@ -230,7 +230,7 @@ public class SDBaseOps { public SDVariable argmin(String name, SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, keepDims, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -256,7 +256,7 @@ public class SDBaseOps { public SDVariable argmin(SDVariable in, int... dimensions) { SDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, false, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable(); } /** @@ -282,7 +282,7 @@ public class SDBaseOps { public SDVariable argmin(String name, SDVariable in, int... dimensions) { SDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, false, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java index 4d42b2295..15a26059f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java @@ -1875,7 +1875,7 @@ public class SDMath extends SDOps { public SDVariable iamax(SDVariable in, int... dimensions) { SDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, false, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable(); } /** @@ -1890,7 +1890,7 @@ public class SDMath extends SDOps { public SDVariable iamax(String name, SDVariable in, int... dimensions) { SDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, false, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -1906,7 +1906,7 @@ public class SDMath extends SDOps { public SDVariable iamax(SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, keepDims, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable(); } /** @@ -1922,7 +1922,7 @@ public class SDMath extends SDOps { public SDVariable iamax(String name, SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, keepDims, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -1937,7 +1937,7 @@ public class SDMath extends SDOps { public SDVariable iamin(SDVariable in, int... dimensions) { SDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, false, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable(); } /** @@ -1952,7 +1952,7 @@ public class SDMath extends SDOps { public SDVariable iamin(String name, SDVariable in, int... dimensions) { SDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, false, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } @@ -1968,7 +1968,7 @@ public class SDMath extends SDOps { public SDVariable iamin(SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, keepDims, dimensions).outputVariable(); + return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable(); } /** @@ -1984,7 +1984,7 @@ public class SDMath extends SDOps { public SDVariable iamin(String name, SDVariable in, boolean keepDims, int... dimensions) { SDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, keepDims, dimensions).outputVariable(); + SDVariable out = new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable(); return sd.updateVariableNameAndReference(out, name); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java index 33d983f23..52f39982b 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java @@ -682,14 +682,6 @@ public class LegacyOpMapper { public static Class indexReduceClass(int opNum){ switch (opNum){ - case 0: - return IMax.class; - case 1: - return IMin.class; - case 2: - return IAMax.class; - case 3: - return IAMin.class; case 4: return FirstIndex.class; case 5: diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java index 756052851..386ead0b3 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java @@ -1055,10 +1055,6 @@ public class OpValidation { IsNumericTensor.class, //Exclude index accumulations (index out, not real-valued) FirstIndex.class, - IAMax.class, - IAMin.class, - IMax.class, - IMin.class, LastIndex.class, ArgMax.class, ArgMin.class, diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java index a053a40ab..63138719c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java @@ -105,13 +105,11 @@ public class ImportClassMapping { org.nd4j.linalg.api.ops.impl.image.ResizeNearestNeighbor.class, org.nd4j.linalg.api.ops.impl.image.ResizeArea.class, org.nd4j.linalg.api.ops.impl.indexaccum.FirstIndex.class, - org.nd4j.linalg.api.ops.impl.indexaccum.IAMax.class, - org.nd4j.linalg.api.ops.impl.indexaccum.IAMin.class, - org.nd4j.linalg.api.ops.impl.indexaccum.IMax.class, - org.nd4j.linalg.api.ops.impl.indexaccum.IMin.class, org.nd4j.linalg.api.ops.impl.indexaccum.LastIndex.class, org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax.class, org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin.class, + org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax.class, + org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin.class, org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction.class, org.nd4j.linalg.api.ops.impl.layers.convolution.AvgPooling2D.class, org.nd4j.linalg.api.ops.impl.layers.convolution.AvgPooling3D.class, diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java deleted file mode 100644 index b2e0d1192..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java +++ /dev/null @@ -1,78 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.api.ops.impl.indexaccum; - -import lombok.Data; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.BaseIndexAccumulation; - -import java.util.Collections; -import java.util.List; - -/** - * Calculate the index of the max absolute value over a vector - * - * @author Adam Gibson - */ -@Data -public class IAMax extends BaseIndexAccumulation { - public IAMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { - super(sameDiff, i_v, keepDims, dimensions); - } - - public IAMax() {} - - public IAMax(INDArray x, int... dimensions) { - this(x, false, dimensions); - } - - public IAMax(INDArray x, boolean keepDims, int... dimensions) { - this(x, null, dimensions); - this.keepDims = keepDims; - } - - public IAMax(INDArray x, INDArray z, int... dimensions) { - super(x, z, dimensions); - } - - @Override - public int opNum() { - return 2; - } - - @Override - public String opName() { - return "iamax"; - } - - @Override - public String onnxName() { - return "AbsArgMax"; - } - - @Override - public String tensorflowName() { - return "absargmax"; - } - - @Override - public List doDiff(List grad){ - return Collections.singletonList(sameDiff.zerosLike(arg())); - } -} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java deleted file mode 100644 index f20547c1d..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java +++ /dev/null @@ -1,80 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.api.ops.impl.indexaccum; - -import lombok.Data; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.BaseIndexAccumulation; - -import java.util.Collections; -import java.util.List; - -/** - * Calculate the index of the max absolute value over a vector - * - * @author Adam Gibson - */ -@Data -public class IAMin extends BaseIndexAccumulation { - public IAMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { - super(sameDiff, i_v, keepDims, dimensions); - } - - public IAMin() {} - - public IAMin(INDArray x, int... dimensions) { - super(x, dimensions); - } - - public IAMin(INDArray in, boolean keepDims, int... dimnesions){ - super(in, null, dimnesions); - this.keepDims = keepDims; - } - - public IAMin(INDArray x, INDArray z, int... dimensions) { - super(x, z, dimensions); - } - - - - @Override - public int opNum() { - return 3; - } - - @Override - public String opName() { - return "iamin"; - } - - @Override - public String onnxName() { - return "AbsArgMin"; - } - - @Override - public String tensorflowName() { - return "absargmin"; - } - - @Override - public List doDiff(List grad){ - return Collections.singletonList(sameDiff.zerosLike(arg())); - } -} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java deleted file mode 100644 index 127239bc7..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java +++ /dev/null @@ -1,87 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.api.ops.impl.indexaccum; - -import lombok.Data; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.imports.NoOpNameFoundException; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.BaseIndexAccumulation; - -import java.util.Collections; -import java.util.List; - -/** - * Calculate the index - * of max value over a vector - * - * @author Alex Black - */ -@Data -public class IMax extends BaseIndexAccumulation { - public IMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { - super(sameDiff, i_v, keepDims, dimensions); - } - - public IMax() { - } - - public IMax(INDArray x, INDArray z, int... dimensions) { - super(x, z, dimensions); - } - - public IMax(INDArray x, int... dimensions) { - super(x, null, dimensions); - } - - public IMax(INDArray x, boolean keepDims, int... dimensions) { - super(x, null, dimensions); - this.keepDims = keepDims; - } - - @Override - public int opNum() { - return 0; - } - - @Override - public String opName() { - return "imax"; - } - - @Override - public String onnxName() { - return "arg_max"; - } - - @Override - public String tensorflowName() { - throw new NoOpNameFoundException("No tensorflow op opName found for " + opName()); - } - - @Override - public Type opType() { - return Type.INDEXREDUCE; - } - - @Override - public List doDiff(List f1) { - //Not differentiable, but (assuming no ties) output does not change for a given infinitesimal change in the input - return Collections.singletonList(sameDiff.zerosLike(arg())); - } -} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java deleted file mode 100644 index a459e8c9c..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java +++ /dev/null @@ -1,83 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.api.ops.impl.indexaccum; - -import lombok.Data; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.imports.NoOpNameFoundException; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.BaseIndexAccumulation; - -import java.util.Collections; -import java.util.List; - -/** - * Calculate the index of min value over a vector - * - * @author Alex Black - */ -@Data -public class IMin extends BaseIndexAccumulation { - public IMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { - super(sameDiff, i_v, keepDims, dimensions); - } - - public IMin() { - } - - public IMin(INDArray x, int... dimensions) { - super(x, dimensions); - } - - public IMin(INDArray x, boolean keepDims, int... dimensions) { - super(x, keepDims, dimensions); - } - - public IMin(INDArray x, INDArray z, int... dimensions) { - super(x, z, dimensions); - } - - - - @Override - public int opNum() { - return 1; - } - - @Override - public String opName() { - return "imin"; - } - - @Override - public String onnxName() { - return "ArgMin"; - } - - @Override - public String tensorflowName() { - throw new NoOpNameFoundException("No tensorflow op opName found for " + opName()); - } - - - @Override - public List doDiff(List f1) { - //Not differentiable, but (assuming no ties) output does not change for a given infinitesimal change in the input - return Collections.singletonList(sameDiff.zerosLike(arg())); - } -} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java new file mode 100644 index 000000000..b4d74d3be --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java @@ -0,0 +1,111 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.impl.indexaccum.custom; + +import lombok.Data; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.base.Preconditions; +import org.nd4j.imports.NoOpNameFoundException; +import org.nd4j.imports.graphmapper.tf.TFGraphMapper; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.tensorflow.framework.AttrValue; +import org.tensorflow.framework.GraphDef; +import org.tensorflow.framework.NodeDef; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +@Data +public class ArgAmax extends DynamicCustomOp { + protected boolean keepDims = false; + private int[] dimensions; + + protected DataType outputType = DataType.INT64; + + public ArgAmax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { + super(sameDiff, i_v); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgAmax() { + } + + public ArgAmax(INDArray x, INDArray z, boolean keepDims, int... dimensions) { + super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgAmax(INDArray x, INDArray z, int... dimensions) { + this(x, z, false, dimensions); + } + + public ArgAmax(INDArray x, int... dimensions) { + this(x, null, dimensions); + } + + public ArgAmax(INDArray x, boolean keepDims, int... dimensions) { + this(x, null, keepDims, dimensions); + } + + @Override + public String opName() { + return "argamax"; + } + + @Override + public String tensorflowName() { + throw new NoOpNameFoundException("No tensorflow op opName found for " + opName()); + } + + @Override + public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map attributesForNode, GraphDef graph) { + if(attributesForNode.containsKey("output_type")) { + outputType = TFGraphMapper.convertType(attributesForNode.get("output_type").getType()); + } else { + outputType = DataType.LONG; + } + } + + @Override + public List calculateOutputDataTypes(List inputDataTypes){ + Preconditions.checkState(inputDataTypes != null && (inputDataTypes.size() == 1 || inputDataTypes.size() == 2), + "Expected 1 or 2 input datatype to argamax, got %s", inputDataTypes); //2nd input: axis + return Collections.singletonList(outputType == null ? DataType.LONG : outputType); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java new file mode 100644 index 000000000..530d7778e --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java @@ -0,0 +1,111 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.impl.indexaccum.custom; + +import lombok.Data; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.base.Preconditions; +import org.nd4j.imports.NoOpNameFoundException; +import org.nd4j.imports.graphmapper.tf.TFGraphMapper; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.tensorflow.framework.AttrValue; +import org.tensorflow.framework.GraphDef; +import org.tensorflow.framework.NodeDef; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +@Data +public class ArgAmin extends DynamicCustomOp { + protected boolean keepDims = false; + private int[] dimensions; + + protected DataType outputType = DataType.INT64; + + public ArgAmin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { + super(sameDiff, i_v); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgAmin() { + } + + public ArgAmin(INDArray x, INDArray z, boolean keepDims, int... dimensions) { + super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgAmin(INDArray x, INDArray z, int... dimensions) { + this(x, z, false, dimensions); + } + + public ArgAmin(INDArray x, int... dimensions) { + this(x, null, dimensions); + } + + public ArgAmin(INDArray x, boolean keepDims, int... dimensions) { + this(x, null, keepDims, dimensions); + } + + @Override + public String opName() { + return "argamin"; + } + + @Override + public String tensorflowName() { + throw new NoOpNameFoundException("No tensorflow op opName found for " + opName()); + } + + @Override + public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map attributesForNode, GraphDef graph) { + if(attributesForNode.containsKey("output_type")) { + outputType = TFGraphMapper.convertType(attributesForNode.get("output_type").getType()); + } else { + outputType = DataType.LONG; + } + } + + @Override + public List calculateOutputDataTypes(List inputDataTypes){ + Preconditions.checkState(inputDataTypes != null && (inputDataTypes.size() == 1 || inputDataTypes.size() == 2), + "Expected 1 or 2 input datatype to argamin, got %s", inputDataTypes); //2nd input: axis + return Collections.singletonList(outputType == null ? DataType.LONG : outputType); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java index 1c19b82a5..799e6ec65 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java @@ -17,10 +17,12 @@ package org.nd4j.linalg.api.ops.impl.indexaccum.custom; import lombok.Data; +import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.common.base.Preconditions; import org.nd4j.imports.graphmapper.tf.TFGraphMapper; import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.tensorflow.framework.AttrValue; import org.tensorflow.framework.GraphDef; @@ -32,8 +34,53 @@ import java.util.Map; @Data public class ArgMax extends DynamicCustomOp { + protected boolean keepDims = false; + private int[] dimensions; - protected DataType outputType; + protected DataType outputType = DataType.INT64; + + public ArgMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { + super(sameDiff, i_v); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgMax() { + } + + public ArgMax(INDArray x, INDArray z, boolean keepDims, int... dimensions) { + super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgMax(INDArray x, INDArray z, int... dimensions) { + this(x, z, false, dimensions); + } + + public ArgMax(INDArray x, int... dimensions) { + this(x, null, dimensions); + } + + public ArgMax(INDArray x, boolean keepDims, int... dimensions) { + this(x, null, keepDims, dimensions); + } @Override public String opName() { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java index c93bb1acf..cfd96de42 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java @@ -17,10 +17,12 @@ package org.nd4j.linalg.api.ops.impl.indexaccum.custom; import lombok.Data; +import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.common.base.Preconditions; import org.nd4j.imports.graphmapper.tf.TFGraphMapper; import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.tensorflow.framework.AttrValue; import org.tensorflow.framework.GraphDef; @@ -37,8 +39,53 @@ import java.util.Map; */ @Data public class ArgMin extends DynamicCustomOp { + protected boolean keepDims = false; + private int[] dimensions; - protected DataType outputType = DataType.LONG; + protected DataType outputType = DataType.INT64; + + public ArgMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) { + super(sameDiff, i_v); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgMin() { + } + + public ArgMin(INDArray x, INDArray z, boolean keepDims, int... dimensions) { + super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]); + + this.keepDims = keepDims; + this.dimensions = dimensions; + + if (dimensions != null && dimensions.length > 0) + addIArgument(dimensions); + + addBArgument(keepDims); + + addDArgument(outputType); + } + + public ArgMin(INDArray x, INDArray z, int... dimensions) { + this(x, z, false, dimensions); + } + + public ArgMin(INDArray x, int... dimensions) { + this(x, null, dimensions); + } + + public ArgMin(INDArray x, boolean keepDims, int... dimensions) { + this(x, null, keepDims, dimensions); + } @Override public String opName() { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index b01c28d16..88d0cbe44 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -17,6 +17,8 @@ package org.nd4j.linalg.factory; import lombok.extern.slf4j.Slf4j; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin; import org.nd4j.linalg.factory.ops.*; import org.nd4j.shade.guava.primitives.Ints; import org.nd4j.shade.guava.primitives.Longs; @@ -50,8 +52,6 @@ import org.nd4j.linalg.api.ops.Op; import org.nd4j.linalg.api.ops.OpContext; import org.nd4j.linalg.api.ops.executioner.DefaultOpExecutioner; import org.nd4j.linalg.api.ops.executioner.OpExecutioner; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMin; import org.nd4j.linalg.api.ops.impl.reduce.Mmul; import org.nd4j.linalg.api.ops.impl.scalar.ReplaceNans; import org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate; @@ -627,16 +627,16 @@ public class Nd4j { * @return array of maximum values. */ public static INDArray argMax(INDArray arr, @NonNull int... dimension) { - IMax imax = new IMax(arr, dimension); - return Nd4j.getExecutioner().exec(imax); + val imax = new ArgMax(arr, dimension); + return Nd4j.getExecutioner().exec(imax)[0]; } /** * See {@link #argMax(INDArray, int...)} but return minimum values. */ public static INDArray argMin(INDArray arr, @NonNull int... dimension) { - IMin imin = new IMin(arr, dimension); - return Nd4j.getExecutioner().exec(imin); + val imin = new ArgMin(arr, dimension); + return Nd4j.getExecutioner().exec(imin)[0]; } /** diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java index 83352cbba..1b2718e2e 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java @@ -75,7 +75,7 @@ public class NDBase { public INDArray argmax(INDArray in, boolean keepDims, int... dimensions) { NDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(in, keepDims, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, keepDims, dimensions))[0]; } /** @@ -97,7 +97,7 @@ public class NDBase { public INDArray argmax(INDArray in, int... dimensions) { NDValidation.validateNumerical("argmax", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(in, false, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, false, dimensions))[0]; } /** @@ -123,7 +123,7 @@ public class NDBase { public INDArray argmin(INDArray in, boolean keepDims, int... dimensions) { NDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(in, keepDims, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, keepDims, dimensions))[0]; } /** @@ -148,7 +148,7 @@ public class NDBase { public INDArray argmin(INDArray in, int... dimensions) { NDValidation.validateNumerical("argmin", "in", in); Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(in, false, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, false, dimensions))[0]; } /** diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java index cb8ab10c0..cf03080f0 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java @@ -896,7 +896,7 @@ public class NDMath { public INDArray iamax(INDArray in, int... dimensions) { NDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(in, false, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, false, dimensions))[0]; } /** @@ -911,7 +911,7 @@ public class NDMath { public INDArray iamax(INDArray in, boolean keepDims, int... dimensions) { NDValidation.validateNumerical("iamax", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(in, keepDims, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, keepDims, dimensions))[0]; } /** @@ -925,7 +925,7 @@ public class NDMath { public INDArray iamin(INDArray in, int... dimensions) { NDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(in, false, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, false, dimensions))[0]; } /** @@ -940,7 +940,7 @@ public class NDMath { public INDArray iamin(INDArray in, boolean keepDims, int... dimensions) { NDValidation.validateNumerical("iamin", "in", in); Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length); - return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(in, keepDims, dimensions)); + return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, keepDims, dimensions))[0]; } /** diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index b97274ba1..b4ef3cb05 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -17469,6 +17469,60 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); } // #endif + /** + * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s)) + * Expected input: + * 0: N-dimensional array + * 1: optional axis vector + * + * Int args: + * 0: optional axis + */ +// #if NOT_EXCLUDED(OP_argamax) + @Namespace("sd::ops") public static class argamax extends DeclarableCustomOp { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public argamax(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public argamax(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public argamax position(long position) { + return (argamax)super.position(position); + } + + public argamax() { super((Pointer)null); allocate(); } + private native void allocate(); + public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); + } +// #endif + + /** + * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s)) + * Expected input: + * 0: N-dimensional array + * 1: optional axis vector + * + * Int args: + * 0: optional axis + */ +// #if NOT_EXCLUDED(OP_argamin) + @Namespace("sd::ops") public static class argamin extends DeclarableCustomOp { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public argamin(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public argamin(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public argamin position(long position) { + return (argamin)super.position(position); + } + + public argamin() { super((Pointer)null); allocate(); } + private native void allocate(); + public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); + } +// #endif + /** * This operation provides various normalization modes: * 0: frobenius diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java index b8b5e05f4..dcd161604 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java @@ -32,8 +32,8 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; -import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IAMin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin; import org.nd4j.linalg.api.ops.impl.loss.SoftmaxCrossEntropyWithLogitsLoss; import org.nd4j.linalg.api.ops.impl.reduce.Moments; import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments; @@ -863,12 +863,12 @@ public class ReductionOpValidation extends BaseOpValidation { break; case 2: reduce = sd.math().iamax(s, dim); - exp = Nd4j.getExecutioner().exec(new IAMax(in.dup(), dim)); + exp = Nd4j.getExecutioner().exec(new ArgAmax(in.dup(), dim))[0]; name = "iamax"; break; case 3: reduce = sd.math().iamin(s, dim); - exp = Nd4j.getExecutioner().exec(new IAMin(in.dup(), dim)); + exp = Nd4j.getExecutioner().exec(new ArgAmin(in.dup(), dim))[0]; name = "iamin"; break; case 4: diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java index 0d1d6a600..ca733c1e8 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java @@ -144,7 +144,7 @@ public class NameScopeTests extends BaseNd4jTest { scope.close(); - assertTrue("Var with name test/imax exists", SD.variableMap().containsKey("test/imax")); + assertTrue("Var with name test/argmax exists", SD.variableMap().containsKey("test/argmax")); } @Test diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java index a70ede362..c9f5cef6f 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java @@ -52,10 +52,10 @@ import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastEqualTo; import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastGreaterThan; import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastGreaterThanOrEqual; import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastLessThan; -import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IAMin; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin; import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv2D; import org.nd4j.linalg.api.ops.impl.layers.convolution.Im2col; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; @@ -3765,10 +3765,10 @@ public class Nd4jTestsC extends BaseNd4jTest { Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.ALL); INDArray arr = Nd4j.create(new double[] {-0.24, -0.26, -0.07, -0.01}); - IMax iMax = new IMax(arr); - IAMax iaMax = new IAMax(arr.dup()); - val imax = Nd4j.getExecutioner().execAndReturn(iMax).getFinalResult().intValue(); - val iamax = Nd4j.getExecutioner().execAndReturn(iaMax).getFinalResult().intValue(); + val iMax = new ArgMax(arr); + val iaMax = new ArgAmax(arr.dup()); + val imax = Nd4j.getExecutioner().exec(iMax)[0].getInt(0); + val iamax = Nd4j.getExecutioner().exec(iaMax)[0].getInt(0); // System.out.println("IMAX: " + imax); // System.out.println("IAMAX: " + iamax); assertEquals(1, iamax); @@ -3780,10 +3780,10 @@ public class Nd4jTestsC extends BaseNd4jTest { public void testIMinIAMin() { INDArray arr = Nd4j.create(new double[] {-0.24, -0.26, -0.07, -0.01}); INDArray abs = Transforms.abs(arr); - IAMin iaMin = new IAMin(abs); - IMin iMin = new IMin(arr.dup()); - double imin = Nd4j.getExecutioner().execAndReturn(iMin).getFinalResult().doubleValue(); - double iamin = Nd4j.getExecutioner().execAndReturn(iaMin).getFinalResult().doubleValue(); + val iaMin = new ArgAmin(abs); + val iMin = new ArgMin(arr.dup()); + double imin = Nd4j.getExecutioner().exec(iMin)[0].getDouble(0); + double iamin = Nd4j.getExecutioner().exec(iaMin)[0].getDouble(0); // System.out.println("IMin: " + imin); // System.out.println("IAMin: " + iamin); assertEquals(3, iamin, 1e-12); @@ -4077,7 +4077,7 @@ public class Nd4jTestsC extends BaseNd4jTest { arr.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.all()).assign(Nd4j.create(slices[i])); } - INDArray out = Nd4j.getExecutioner().exec(new IMax(arr, 1,2)); + INDArray out = Nd4j.exec(new ArgMax(arr, 1,2))[0]; assertEquals(DataType.LONG, out.dataType()); @@ -4119,8 +4119,8 @@ public class Nd4jTestsC extends BaseNd4jTest { } } - INDArray actC = Nd4j.getExecutioner().exec(new IMax(arr.dup('c'), 0,1)); - INDArray actF = Nd4j.getExecutioner().exec(new IMax(arr.dup('f'), 0,1)); + INDArray actC = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('c'), 0,1))[0]; + INDArray actF = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('f'), 0,1))[0]; // assertEquals(exp, actC); assertEquals(exp, actF); @@ -4153,8 +4153,8 @@ public class Nd4jTestsC extends BaseNd4jTest { } } - actC = Nd4j.getExecutioner().exec(new IMax(arr.dup('c'), 2, 3)); - actF = Nd4j.getExecutioner().exec(new IMax(arr.dup('f'), 2, 3)); + actC = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('c'), 2, 3))[0]; + actF = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('f'), 2, 3))[0]; assertEquals(exp, actC); assertEquals(exp, actF); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java index d0bcb3975..3277ddfc7 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java @@ -25,7 +25,7 @@ import org.junit.runners.Parameterized; import org.nd4j.linalg.BaseNd4jTest; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; import org.nd4j.linalg.api.ops.impl.reduce3.ManhattanDistance; import org.nd4j.linalg.api.ops.impl.transforms.custom.LogSoftMax; import org.nd4j.linalg.api.ops.impl.transforms.custom.SoftMax; @@ -122,7 +122,7 @@ public class CrashTest extends BaseNd4jTest { float sum = x.sumNumber().floatValue(); // index reduction - Nd4j.getExecutioner().exec(new IMax(x)); + Nd4j.getExecutioner().exec(new ArgMax(x)); // casual transform Nd4j.getExecutioner().exec(new Sqrt(x, x)); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java index 0fc085abe..330c1110a 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java @@ -26,9 +26,9 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.executioner.OpExecutioner; -import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin; import org.nd4j.linalg.api.ops.impl.reduce.floating.Mean; import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2; import org.nd4j.linalg.api.ops.impl.reduce.floating.NormMax; @@ -282,9 +282,9 @@ public class OpExecutionerTests extends BaseNd4jTest { public void testIamax2() { INDArray linspace = Nd4j.linspace(1, 4, 4, DataType.DOUBLE); assertEquals(getFailureMessage(), 3, Nd4j.getBlasWrapper().iamax(linspace)); - val op = new IAMax(linspace); + val op = new ArgAmax(linspace); - int iamax = Nd4j.getExecutioner().execAndReturn(op).getFinalResult().intValue(); + int iamax = Nd4j.getExecutioner().exec(op)[0].getInt(0); assertEquals(3, iamax); } @@ -565,24 +565,24 @@ public class OpExecutionerTests extends BaseNd4jTest { @Test public void testIMax() { INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE); - IMax imax = new IMax(arr); - assertEquals(9, Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue()); + ArgMax imax = new ArgMax(arr); + assertEquals(9, Nd4j.getExecutioner().exec(imax)[0].getInt(0)); arr.muli(-1); - imax = new IMax(arr); - int maxIdx = Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue(); + imax = new ArgMax(arr); + int maxIdx = Nd4j.getExecutioner().exec(imax)[0].getInt(0); assertEquals(0, maxIdx); } @Test public void testIMin() { INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE); - IMin imin = new IMin(arr); - assertEquals(0, Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue()); + ArgMin imin = new ArgMin(arr); + assertEquals(0, Nd4j.getExecutioner().exec(imin)[0].getInt(0)); arr.muli(-1); - imin = new IMin(arr); - int minIdx = Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue(); + imin = new ArgMin(arr); + int minIdx = Nd4j.getExecutioner().exec(imin)[0].getInt(0); assertEquals(9, minIdx); } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java index 66305b42a..117f8745b 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java @@ -32,8 +32,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.executioner.OpExecutioner; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMax; -import org.nd4j.linalg.api.ops.impl.indexaccum.IMin; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax; +import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin; import org.nd4j.linalg.api.ops.impl.reduce.floating.Mean; import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2; import org.nd4j.linalg.api.ops.impl.reduce.floating.NormMax; @@ -478,24 +478,24 @@ public class OpExecutionerTestsC extends BaseNd4jTest { @Test public void testIMax() { INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE); - IMax imax = new IMax(arr); - assertEquals(9, Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue()); + ArgMax imax = new ArgMax(arr); + assertEquals(9, Nd4j.getExecutioner().exec(imax)[0].getInt(0)); arr.muli(-1); - imax = new IMax(arr); - int maxIdx = Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue(); + imax = new ArgMax(arr); + int maxIdx = Nd4j.getExecutioner().exec(imax)[0].getInt(0); assertEquals(0, maxIdx); } @Test public void testIMin() { INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE); - IMin imin = new IMin(arr); - assertEquals(0, Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue()); + ArgMin imin = new ArgMin(arr); + assertEquals(0, Nd4j.getExecutioner().exec(imin)[0].getInt(0)); arr.muli(-1); - imin = new IMin(arr); - int minIdx = Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue(); + imin = new ArgMin(arr); + int minIdx = Nd4j.getExecutioner().exec(imin)[0].getInt(0); assertEquals(9, minIdx); } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java index aa81097d1..c07fae701 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java @@ -26,6 +26,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.ops.impl.reduce.bool.All; +import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4jBackend; @@ -234,7 +235,7 @@ public class EmptyTests extends BaseNd4jTest { assertEquals(e, reduced); } - @Test(expected = IllegalArgumentException.class) + @Test(expected = ND4JIllegalStateException.class) public void testEmptyReduction_4() { val x = Nd4j.create(DataType.FLOAT, 2, 0); val e = Nd4j.create(DataType.FLOAT, 0); From 22141759345fbb464e19be943c54f2000a3978b0 Mon Sep 17 00:00:00 2001 From: "raver119@gmail.com" Date: Thu, 14 May 2020 13:54:52 +0300 Subject: [PATCH 02/21] disable unwanted logging Signed-off-by: raver119@gmail.com --- libnd4j/blas/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index 9902649f8..8419cdd4c 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -300,7 +300,7 @@ elseif(SD_CPU) string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) set(FL_ITEM_WLE ${CMAKE_MATCH_1}) foreach(FL_TYPE_INDEX RANGE 0 9) - message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") + #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) endforeach() From 753ce28a924053228d0d5a1df33c24e4ff22d459 Mon Sep 17 00:00:00 2001 From: Yurii Shyrma Date: Thu, 14 May 2020 18:06:13 +0300 Subject: [PATCH 03/21] Shyrma sqrtm (#429) * - start working on implementation of sqrtm op Signed-off-by: Yurii * - improving householder procedure Signed-off-by: Yurii * - further polishing householder stuff Signed-off-by: Yurii * - polishing hh pivoting qr procedure Signed-off-by: Yurii * - polishing BiDiagonalUp procedure Signed-off-by: Yurii * - polishing householder sequence class Signed-off-by: Yurii * - polishing jacobi svd class Signed-off-by: Yurii * - polishing svd stuff 1 Signed-off-by: Yurii * - polishing svd stuff 2 Signed-off-by: Yurii * - implementation and testing class which performs Hessenberg decomposition of square matrix Signed-off-by: Yurii * - add static method to JacobiSVD class which makes the continuous Givens rotation generation algorithm Signed-off-by: Yurii * - implementation and testing auxiliary methods of Schur decomp class Signed-off-by: Yurii * some references here and there Signed-off-by: raver119 * - trying figure out difference between eigen and our Schur alg Signed-off-by: Yurii * - testing fixing bugs in Schur decomposition op Signed-off-by: Yurii * - start to implement class which performs calculation of eigen values and vectors Signed-off-by: Yurii * - add to EigenValsAndVecs method which calculates complex eigen vectors Signed-off-by: Yurii * - testing and fixing bugs in EigenValsAndVecs class Signed-off-by: Yurii * - implementation and testing triangularSolver class Signed-off-by: Yurii * Added a 2D routine for triangular systems solve. Signed-off-by: shugeo * Refactored triangularSolve2D routine and tests. Signed-off-by: shugeo * Refactored another test for triangularSolve2D. Signed-off-by: shugeo * Refactored test for triangularSolve for vector-bar case. Signed-off-by: shugeo * Refactored triangularSolve2D routine and tests. Signed-off-by: shugeo * - implementation of FullPivLU class Signed-off-by: Yurii * - fix bugs in FullPivLU::solve method Signed-off-by: Yurii * - correct permutation vector in FullPivLU::solve Signed-off-by: Yurii * - correct include headers Signed-off-by: Yurii * - implementation of Sqrtm class Signed-off-by: Yurii * - testing and fixing bugs in Sqrtm class Signed-off-by: Yurii * - include sqrtm classes to cuda folder, investigate in what places synchronization doesn't work Signed-off-by: Yurii * Added implementation for cuda triangularSolve2D and also refactored triangularSolve2D for cpu. Signed-off-by: shugeo * Eliminated waste implementations. Signed-off-by: shugeo * - make offset calculation faster in t<> methods Signed-off-by: Yurii * - rename refference T& NDArray::t<> method Signed-off-by: Yurii * - further work on cuda sqrtm Signed-off-by: Yurii * - provide correct synchronization to device in Sqrtm class Signed-off-by: Yurii * - add tests for sqrtm op Signed-off-by: Yurii * - correct fails which appeared while testing on jenkins Signed-off-by: Yurii * - trying to find out mistake in svd::deflation method Signed-off-by: Yurii * Revert "- trying to find out mistake in svd::deflation method" This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6. * Revert "- trying to find out mistake in svd::deflation method" This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6. Signed-off-by: Yurii * - change call semantic of r<> and t<> methods Signed-off-by: Yurii * - ged rid of ambiguity in * operator overloads for windows buikd Signed-off-by: Yurii * - get rid of ambiguity in * operator overloads for windows build 2 Signed-off-by: Yurii * - get rid of ambiguity in * operator overloads for windows build 3 Signed-off-by: Yurii * - resolve conflicts with master Signed-off-by: Yurii * cmakelists updated Signed-off-by: raver119@gmail.com * - minor fix in merge cpu helper - make use of reference getter Signed-off-by: Yurii Co-authored-by: raver119 Co-authored-by: shugeo --- libnd4j/include/array/NDArray.h | 110 +-- libnd4j/include/array/NDArray.hXX | 45 +- libnd4j/include/array/cpu/NDArray.cpp | 31 +- libnd4j/include/array/cuda/NDArray.cu | 19 +- libnd4j/include/helpers/EigenValsAndVecs.h | 86 ++ libnd4j/include/helpers/FullPivLU.h | 52 + libnd4j/include/helpers/HessenbergAndSchur.h | 102 ++ libnd4j/include/helpers/Sqrtm.h | 45 + libnd4j/include/helpers/biDiagonalUp.h | 13 +- libnd4j/include/helpers/cpu/biDiagonalUp.cpp | 180 ---- libnd4j/include/helpers/cpu/hhColPivQR.cpp | 171 ---- libnd4j/include/helpers/cpu/householder.cpp | 221 ----- libnd4j/include/helpers/cpu/svd.cpp | 485 ++++------ libnd4j/include/helpers/hhSequence.h | 28 +- libnd4j/include/helpers/householder.h | 46 +- .../include/helpers/impl/EigenValsAndVecs.cpp | 293 ++++++ libnd4j/include/helpers/impl/FullPivLU.cpp | 170 ++++ .../helpers/impl/HessenbergAndSchur.cpp | 383 ++++++++ libnd4j/include/helpers/impl/MmulHelper.cpp | 2 +- libnd4j/include/helpers/impl/Sqrtm.cpp | 276 ++++++ libnd4j/include/helpers/impl/biDiagonalUp.cpp | 160 +++ libnd4j/include/helpers/impl/hhColPivQR.cpp | 147 +++ .../helpers/{cpu => impl}/hhSequence.cpp | 79 +- libnd4j/include/helpers/impl/householder.cpp | 218 +++++ .../helpers/{cpu => impl}/jacobiSVD.cpp | 235 ++--- libnd4j/include/helpers/jacobiSVD.h | 7 +- libnd4j/include/helpers/shape.h | 2 +- .../loops/cuda/specials/swapUnsafeKernel.cu | 28 +- .../ops/declarable/generic/linalg/sqrtm.cpp | 53 + .../generic/{blas => linalg}/svd.cpp | 0 .../generic/linalg/triangular_solve.cpp | 4 +- libnd4j/include/ops/declarable/headers/blas.h | 30 +- .../ops/declarable/helpers/cpu/betaInc.cpp | 2 +- .../helpers/cpu/extract_patches.cpp | 2 +- .../helpers/cpu/fake_quantization.cpp | 2 +- .../declarable/helpers/cpu/image_resize.cpp | 2 +- .../ops/declarable/helpers/cpu/lstsq.cpp | 4 +- .../ops/declarable/helpers/cpu/lup.cpp | 34 +- .../ops/declarable/helpers/cpu/merge.cpp | 4 +- .../ops/declarable/helpers/cpu/random.cpp | 14 +- .../declarable/helpers/cpu/randomShuffle.cpp | 8 +- .../ops/declarable/helpers/cpu/segment.cpp | 14 +- .../declarable/helpers/cpu/sequence_mask.cpp | 2 +- .../ops/declarable/helpers/cpu/solve.cpp | 6 +- .../ops/declarable/helpers/cpu/svd.cpp | 912 +----------------- .../ops/declarable/helpers/cpu/top_k.cpp | 16 +- .../helpers/cpu/triangular_solve.cpp | 38 +- .../ops/declarable/helpers/cpu/triu.cpp | 2 +- .../helpers/cuda/triangular_solve.cu | 139 ++- .../helpers/impl/sparse_to_dense.cpp | 1 + .../ops/declarable/helpers/impl/sqrtm.cpp | 66 ++ .../include/ops/declarable/helpers/sqrtm.h | 39 + .../ops/declarable/helpers/triangular_solve.h | 4 +- .../layers_tests/DeclarableOpsTests11.cpp | 86 +- .../layers_tests/DeclarableOpsTests13.cpp | 494 ---------- .../layers_tests/DeclarableOpsTests15.cpp | 75 +- .../layers_tests/DeclarableOpsTests19.cpp | 2 + .../tests_cpu/layers_tests/HelpersTests1.cpp | 868 +++++++---------- .../tests_cpu/layers_tests/HelpersTests2.cpp | 426 ++++++++ .../tests_cpu/layers_tests/NDArrayTests.cpp | 3 + .../tests_cpu/layers_tests/NativeOpsTests.cpp | 2 +- .../layers_tests/PlaygroundTests.cpp | 523 ++++++++++ libnd4j/tests_cpu/layers_tests/RNGTests.cpp | 44 +- .../tests_cpu/libnd4j_tests/CMakeLists.txt | 13 +- 64 files changed, 4310 insertions(+), 3258 deletions(-) create mode 100644 libnd4j/include/helpers/EigenValsAndVecs.h create mode 100644 libnd4j/include/helpers/FullPivLU.h create mode 100644 libnd4j/include/helpers/HessenbergAndSchur.h create mode 100644 libnd4j/include/helpers/Sqrtm.h delete mode 100644 libnd4j/include/helpers/cpu/biDiagonalUp.cpp delete mode 100644 libnd4j/include/helpers/cpu/hhColPivQR.cpp delete mode 100644 libnd4j/include/helpers/cpu/householder.cpp create mode 100644 libnd4j/include/helpers/impl/EigenValsAndVecs.cpp create mode 100644 libnd4j/include/helpers/impl/FullPivLU.cpp create mode 100644 libnd4j/include/helpers/impl/HessenbergAndSchur.cpp create mode 100644 libnd4j/include/helpers/impl/Sqrtm.cpp create mode 100644 libnd4j/include/helpers/impl/biDiagonalUp.cpp create mode 100644 libnd4j/include/helpers/impl/hhColPivQR.cpp rename libnd4j/include/helpers/{cpu => impl}/hhSequence.cpp (59%) create mode 100644 libnd4j/include/helpers/impl/householder.cpp rename libnd4j/include/helpers/{cpu => impl}/jacobiSVD.cpp (58%) create mode 100644 libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp rename libnd4j/include/ops/declarable/generic/{blas => linalg}/svd.cpp (100%) create mode 100644 libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/sqrtm.h create mode 100644 libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index ae4df227d..04500a987 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -1163,7 +1163,7 @@ namespace sd { /** * fill target matrix with given value in one or two directions from main diagonal: - * - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both) + * - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both) * - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both) * direction - in what direction to fill matrix. There are 3 possible directions: * 'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected @@ -1230,14 +1230,13 @@ namespace sd { * returns reference on array element with given index */ template - FORCEINLINE T& t(const Nd4jLong index); - + FORCEINLINE T& r(const Nd4jLong index); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w); /** @@ -1246,7 +1245,6 @@ namespace sd { */ template FORCEINLINE T t(const Nd4jLong i) const; - template FORCEINLINE T t(const Nd4jLong i, const Nd4jLong j) const; template @@ -1778,70 +1776,60 @@ DataType NDArray::dataType() const { //////////////////////////////////////////////////////////////////////// template -T& NDArray::t(const Nd4jLong i) { +T& NDArray::r(const Nd4jLong i) { // if (i >= _length) // throw std::invalid_argument("NDArray::t(i): input index is out of array length !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - + syncToHost(); tickWriteHost(); + return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } //////////////////////////////////////////////////////////////////////// template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j) { if (rankOf() != 2 || i >= sizeAt(0) || j >= sizeAt(1)) throw std::invalid_argument("NDArray::t(i,j): one of input indexes is out of array length or rank!=2 !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1)))); } template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2)))); } template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) { if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4 !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3)))); } //////////////////////////////////////////////////////////////////////// @@ -1853,10 +1841,8 @@ T NDArray::t(const Nd4jLong i) const { if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - tickReadHost(); return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } @@ -1869,48 +1855,38 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1)))); } - template - T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { +//////////////////////////////////////////////////////////////////////// +template +T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { - if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) - throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); - if (DataTypeUtils::fromT() != _dataType) - throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); + if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) + throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); + if (DataTypeUtils::fromT() != _dataType) + throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); - } + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2)))); +} - template - T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const { +//////////////////////////////////////////////////////////////////////// +template +T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const { - if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) - throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!"); - if (DataTypeUtils::fromT() != _dataType) - throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); + if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) + throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!"); + if (DataTypeUtils::fromT() != _dataType) + throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); - } + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3)))); +} #ifndef __JAVACPP_HACK__ //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 786333eec..773d845ab 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -2170,7 +2170,7 @@ const std::string* ND4J_EXPORT NDArray::bufferAsT() const { template const T* NDArray::bufferAsT() const { // FIXME: do we REALLY want sync here? - syncToHost(); + // syncToHost(); return reinterpret_cast(buffer()); } @@ -2597,11 +2597,9 @@ void NDArray::operator+=(const T value) { auto other = NDArrayFactory::create(this->dataType(), value, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); - + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator+=(const double value); template ND4J_EXPORT void NDArray::operator+=(const float value); @@ -2619,11 +2617,9 @@ void NDArray::operator-=(const T value) { auto other = NDArrayFactory::create(dataType(), value, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); - + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator-=(const double value); template ND4J_EXPORT void NDArray::operator-=(const float value); @@ -2640,10 +2636,9 @@ void NDArray::operator*=(const T scalar) { throw std::runtime_error("NDArray::operator*=: you can't use this method on String array!"); auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator*=(const double scalar); template ND4J_EXPORT void NDArray::operator*=(const float scalar); @@ -2663,9 +2658,9 @@ void NDArray::operator/=(const T scalar) { throw std::runtime_error("NDArray::operator/=: you can't use this method on String array!"); auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator/=(const double scalar); template ND4J_EXPORT void NDArray::operator/=(const float scalar); @@ -3758,8 +3753,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const { if (rankOf() != 2 || i >= shapeOf()[0] || j >= shapeOf()[1]) throw std::invalid_argument("NDArray::e(i,j): one of input indexes is out of array length or rank!=2 !"); - const Nd4jLong coords[2] = {i, j}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -3778,8 +3772,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2]) throw std::invalid_argument("NDArray::e(i,j,k): one of input indexes is out of array length or rank!=3 !"); - const Nd4jLong coords[3] = {i, j, k}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -3798,8 +3791,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon if (rankOf() != 4 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2] || l >= shapeOf()[3]) throw std::invalid_argument("NDArray::e(i,j,k,l): one of input indexes is out of array length or rank!=4 !"); - const Nd4jLong coords[4] = {i, j, k, l}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -4411,8 +4403,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) { throw std::invalid_argument("NDArray:pe(i,j, value): one of input indexes is out of array length or rank!=2 !"); void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[2] = {i, j}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1); NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); @@ -4440,11 +4431,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2]) throw std::invalid_argument("NDArray:pe(i,j,k, value): one of input indexes is out of array length or rank!=3 !"); - NDArray::preparePrimaryUse({this}, {}, true); - void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[3] = {i, j, k}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2); + + NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } @@ -4470,8 +4460,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j throw std::invalid_argument("NDArray::p(i,j,k,l, value): one of input indexes is out of array length or rank!=4 !"); void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3); NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp index 87369f740..873b3fec9 100644 --- a/libnd4j/include/array/cpu/NDArray.cpp +++ b/libnd4j/include/array/cpu/NDArray.cpp @@ -153,21 +153,38 @@ void NDArray::setIdentity() { //////////////////////////////////////////////////////////////////////// template -static void templatedSwap(void *xBuffer, void *yBuffer, Nd4jLong length) { +static void templatedSwap(void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length) { auto x = reinterpret_cast(xBuffer); auto y = reinterpret_cast(yBuffer); + const bool isSameOrders = shape::order(xShapeInfo) == shape::order(xShapeInfo); + + const auto xEws = shape::elementWiseStride(xShapeInfo); + const auto yEws = shape::elementWiseStride(yShapeInfo); + auto func = PRAGMA_THREADS_FOR { - for (auto i = start; i < stop; i++) { - auto temp = x[i]; - x[i] = y[i]; - y[i] = temp; + if(isSameOrders && xEws > 0 && yEws > 0) { + for(auto i = start; i < stop; i++) + sd::math::nd4j_swap(x[i*xEws], y[i*yEws]); + } + else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) { + for(auto i = start; i < stop; i++) { + const auto ind = shape::getIndexOffset(i, xShapeInfo); + sd::math::nd4j_swap(x[ind], y[ind]); + } + } + else { + for(auto i = start; i < stop; i++) { + const auto xInd = shape::getIndexOffset(i, xShapeInfo); + const auto yInd = shape::getIndexOffset(i, yShapeInfo); + sd::math::nd4j_swap(x[xInd], y[yInd]); + } } }; samediff::Threads::parallel_for(func, 0, length); } -BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, Nd4jLong length), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::swapUnsafe(NDArray& other) { @@ -182,7 +199,7 @@ void NDArray::swapUnsafe(NDArray& other) { if(lengthOf() != other.lengthOf()) throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!"); - BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), this->lengthOf()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), shapeInfo(), other.shapeInfo(), this->lengthOf()), LIBND4J_TYPES); } //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/array/cuda/NDArray.cu b/libnd4j/include/array/cuda/NDArray.cu index e33e97c3b..8ed3eceeb 100644 --- a/libnd4j/include/array/cuda/NDArray.cu +++ b/libnd4j/include/array/cuda/NDArray.cu @@ -225,7 +225,13 @@ void NDArray::swapUnsafe(NDArray& other) { if(lengthOf() != other.lengthOf()) throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!"); + PointersManager manager(getContext(), "NDArray::swapUnsafe"); + + prepareSpecialUse({&other, this}, {&other, this}); BUILD_SINGLE_SELECTOR(xType, templatedSwapUnsafe, (specialBuffer(), specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), getContext()->getCudaStream()), LIBND4J_TYPES); + registerSpecialUse({&other, this}, {&other, this}); + + manager.synchronize(); } //////////////////////////////////////////////////////////////////////// @@ -546,21 +552,18 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre if(specialBuffer() == nullptr || _length == 0) { printf("NDArray::printSpecialBuffer: special buffer is nullptr !\n"); return; } - void* pHost = operator new(sizeof(T) * _length); + const auto sizeOfBuffer = sizeOfT() * (getOffset(_length - 1) + 1); - if (ews() != 1) { - for (uint i = 0; i < _length; i++) - cudaMemcpyAsync(reinterpret_cast(pHost) + i, specialBufferWithOffset(i), sizeof(T), cudaMemcpyDeviceToHost, *(getContext()->getCudaStream())); - } - else - cudaMemcpyAsync(pHost, specialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); + void* pHost = operator new(sizeOfBuffer); + + cudaMemcpyAsync(pHost, specialBuffer(), sizeOfBuffer, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); cudaError_t cudaResult = cudaStreamSynchronize(*getContext()->getCudaStream()); if(cudaResult != 0) throw std::runtime_error("NDArray::printSpecialBuffer: cudaStreamSynchronize failed!"); for (uint i = 0; i < _length; i++) - printf("%.*f, ", precision, (double)reinterpret_cast(pHost)[i]); + printf("%.*f, ", precision, (double)reinterpret_cast(pHost)[getOffset(i)]); printf("\n"); operator delete(pHost); diff --git a/libnd4j/include/helpers/EigenValsAndVecs.h b/libnd4j/include/helpers/EigenValsAndVecs.h new file mode 100644 index 000000000..222b9c36e --- /dev/null +++ b/libnd4j/include/helpers/EigenValsAndVecs.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_EIGENVALSANDVECS_H +#define LIBND4J_EIGENVALSANDVECS_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// this class calculates eigenvalues and eigenvectors of given input matrix +template +class EigenValsAndVecs { + + public: + // suppose we got input square NxN matrix + + NDArray _Vals; // {N,2} matrix of eigenvalues, 2 means real and imaginary part + NDArray _Vecs; // {N,N,2} matrix, whose columns are the eigenvectors (complex), 2 means real and imaginary part + + explicit EigenValsAndVecs(const NDArray& matrix); + + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void divideComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) { + + T norm2 = a2*a2 + b2*b2; + + a3 = (a1*a2 + b1*b2) / norm2; + b3 = (a2*b1 - a1*b2) / norm2; + } + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void multiplyComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) { + + a3 = (a1*a2 - b1*b2); + b3 = (a1*b2 + b1*a2); + } + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void sqrtComplexNum(T& a, T& b) { + + T norm = math::nd4j_sqrt(a*a + b*b); + + if(b < (T)0) + b = -math::nd4j_sqrt((T)0.5 * (norm - a)); + else + b = math::nd4j_sqrt((T)0.5 * (norm - a)); + a = math::nd4j_sqrt((T)0.5 * (norm + a)); + } + + + private: + + void calcEigenVals(const NDArray& schurMatrixT); // calculates _Vals + void calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU); // makes changes both in schurMatrixT(NxN) and schurMatrixU(NxN), also calculates and stores pseudo-eigenvectors (real) in schurMatrixU columns + void calcEigenVecs(const NDArray& schurMatrixU); // calculates _Vecs + +}; + + +} +} +} + + +#endif //LIBND4J_EIGENVALSANDVECS_H diff --git a/libnd4j/include/helpers/FullPivLU.h b/libnd4j/include/helpers/FullPivLU.h new file mode 100644 index 000000000..3e285b597 --- /dev/null +++ b/libnd4j/include/helpers/FullPivLU.h @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_FULLPIVLU_H +#define LIBND4J_FULLPIVLU_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// class solves equation A*x = b for x, by procedure of LU decomposition of input matrix A with complete pivoting +// LU decomposition of a matrix is: +// A = P^-1 * L * U * Q^-1 +// L is unit-lower-triangular, +// U is upper-triangular, +// and P and Q are permutation matrices for rows and columns correspondingly + +template +class FullPivLU { + + public: + + // A{M,K} * x{K,N} = b{M,N} + static void solve(const NDArray& A, const NDArray& b, NDArray& x); +}; + + +} +} +} + + +#endif //LIBND4J_FULLPIVLU_H diff --git a/libnd4j/include/helpers/HessenbergAndSchur.h b/libnd4j/include/helpers/HessenbergAndSchur.h new file mode 100644 index 000000000..9c209ea56 --- /dev/null +++ b/libnd4j/include/helpers/HessenbergAndSchur.h @@ -0,0 +1,102 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_HESSENBERGANDSCHUR_H +#define LIBND4J_HESSENBERGANDSCHUR_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// this class implements Hessenberg decomposition of square matrix using orthogonal similarity transformation +// A = Q H Q^T +// Q - orthogonal matrix +// H - Hessenberg matrix +template +class Hessenberg { + // suppose we got input square NxN matrix + + public: + + NDArray _Q; // {N,N} + NDArray _H; // {N,N} + + explicit Hessenberg(const NDArray& matrix); + + private: + void evalData(); +}; + + +// this class implements real Schur decomposition of square matrix using orthogonal similarity transformation +// A = U T U^T +// T - real quasi-upper-triangular matrix - block upper triangular matrix where the blocks on the diagonal are 1×1 or 2×2 with complex eigenvalues +// U - real orthogonal matrix + +template +class Schur { + // suppose we got input square NxN matrix + + public: + + NDArray _T; // {N,N} + NDArray _U; // {N,N} + + explicit Schur(const NDArray& matrix); + + void splitTwoRows(const int ind, const T shift); + + void calcShift(const int ind, const int iter, T& shift, NDArray& shiftInfo); + + void initFrancisQR(const int ind1, const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec); + + void doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec); + + void calcFromHessenberg(); + + private: + + static const int _maxItersPerRow = 40; + + void evalData(const NDArray& matrix); + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE int getSmallSubdiagEntry(const int inInd) { + + int outInd = inInd; + while (outInd > 0) { + T factor = math::nd4j_abs(_T.t(outInd-1, outInd-1)) + math::nd4j_abs(_T.t(outInd, outInd)); + if (math::nd4j_abs(_T.t(outInd, outInd-1)) <= DataTypeUtils::eps() * factor) + break; + outInd--; + } + return outInd; + } +}; + + +} +} +} + + +#endif //LIBND4J_HESSENBERGANDSCHUR_H diff --git a/libnd4j/include/helpers/Sqrtm.h b/libnd4j/include/helpers/Sqrtm.h new file mode 100644 index 000000000..1968bc7a5 --- /dev/null +++ b/libnd4j/include/helpers/Sqrtm.h @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_SQRTM_H +#define LIBND4J_SQRTM_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +template +class Sqrtm { + + + public: + + static void calc(const NDArray& in, NDArray& out); +}; + + +} +} +} + + +#endif //LIBND4J_SQRTM_H diff --git a/libnd4j/include/helpers/biDiagonalUp.h b/libnd4j/include/helpers/biDiagonalUp.h index aaf64d41d..dc44057a9 100644 --- a/libnd4j/include/helpers/biDiagonalUp.h +++ b/libnd4j/include/helpers/biDiagonalUp.h @@ -32,13 +32,14 @@ namespace helpers { class BiDiagonalUp { public: - + NDArray _HHmatrix; // 2D Householder matrix NDArray _HHbidiag; // vector which contains Householder coefficients + NDArray _hhCoeffs; // vector of Householder coefficients /** * constructor - * + * * matrix - input matrix expected to be bi-diagonalized, remains unaffected */ BiDiagonalUp(const NDArray& matrix); @@ -47,7 +48,7 @@ class BiDiagonalUp { * this method evaluates data (coeff, normX, tail) used in Householder transformation * formula for Householder matrix: P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...], "tail" is w except first unity element, that is "tail" = [w1, w2, w3, ...] * tail and coeff are stored in _HHmatrix * normX are stored in _HHbidiag @@ -59,13 +60,13 @@ class BiDiagonalUp { /** * this method evaluates product of Householder sequence matrices (transformations) acting on columns - * + * * type - type of sequence, type = 'u' (acting on columns) or type = 'v' (acting on rows) */ template - HHsequence makeHHsequence_(const char type) const; + HHsequence makeHHsequence_(const char type); - HHsequence makeHHsequence(const char type) const; + HHsequence makeHHsequence(const char type); }; diff --git a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp b/libnd4j/include/helpers/cpu/biDiagonalUp.cpp deleted file mode 100644 index 4623a93ad..000000000 --- a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 18.12.2017 -// - - -#include -#include -#include - - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())), - _HHbidiag(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) { - - // input validation - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !"); - - _HHmatrix.assign(&matrix); - _HHbidiag.assign(0.); - - evalData(); - -} - - template - void BiDiagonalUp::_evalData() { - - const auto rows = _HHmatrix.sizeAt(0); - const auto cols = _HHmatrix.sizeAt(1); - - if(rows < cols) - throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !"); - - NDArray* bottomRightCorner(nullptr), *column(nullptr), *row(nullptr); - T coeff, normX; - - T _x, _y; - - for(Nd4jLong i = 0; i < cols-1; ++i ) { - - // evaluate Householder matrix nullifying columns - column = new NDArray(_HHmatrix({i,rows, i,i+1}, true)); - - _x = _HHmatrix.e(i,i); - _y = _HHbidiag.e(i,i); - - Householder::evalHHmatrixDataI(*column, _x, _y); - - _HHmatrix.p(i, i, _x); - _HHbidiag.p(i, i, _y); - - // multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner - bottomRightCorner = new NDArray(_HHmatrix({i,rows, i+1,cols}, true)); // {i, cols} - Householder::mulLeft(*bottomRightCorner, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.e(i,i)); - - delete bottomRightCorner; - delete column; - - if(i == cols-2) - continue; // do not apply right multiplying at last iteration - - // evaluate Householder matrix nullifying rows - row = new NDArray(_HHmatrix({i,i+1, i+1,cols}, true)); - - _x = _HHmatrix.e(i,i+1); - _y = _HHbidiag.e(i,i+1); - - Householder::evalHHmatrixDataI(*row, _x, _y); - - _HHmatrix.p(i, i+1, _x); - _HHbidiag.p(i, i+1, _y); - - // multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P - bottomRightCorner = new NDArray(_HHmatrix({i+1,rows, i+1,cols}, true)); // {i, rows} - - Householder::mulRight(*bottomRightCorner, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.e(i,i+1)); - - delete bottomRightCorner; - delete row; - } - - row = new NDArray(_HHmatrix({cols-2,cols-1, cols-1,cols}, true)); - - _x = _HHmatrix.e(cols-2,cols-1); - _y = _HHbidiag.e(cols-2,cols-1); - - Householder::evalHHmatrixDataI(*row, _x, _y); - - _HHmatrix.p(cols-2,cols-1, _x); - _HHbidiag.p(cols-2,cols-1, _y); - - delete row; - - column = new NDArray(_HHmatrix({cols-1,rows, cols-1,cols}, true)); - - _x = _HHmatrix.e(cols-1,cols-1); - _y = _HHbidiag.e(cols-1,cols-1); - - Householder::evalHHmatrixDataI(*column, _x, _y); - - _HHmatrix.p(cols-1, cols-1, _x); - _HHbidiag.p(cols-1, cols-1, _y); - - delete column; - } - -////////////////////////////////////////////////////////////////////////// -void BiDiagonalUp::evalData() { - auto xType = _HHmatrix.dataType(); - - BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES); -} - - -////////////////////////////////////////////////////////////////////////// -template -HHsequence BiDiagonalUp::makeHHsequence_(const char type) const { - - if(type == 'u') { - - const int diagSize = _HHbidiag.sizeAt(0); - auto colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(), {diagSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext()); - - for(int i = 0; i < diagSize; ++i) - colOfCoeffs.p(i, _HHmatrix.e(i,i)); - - return HHsequence(_HHmatrix, colOfCoeffs, type); - } - else { - - const int diagUpSize = _HHbidiag.sizeAt(0) - 1; - NDArray colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(), {diagUpSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext()); - - for(int i = 0; i < diagUpSize; ++i) - colOfCoeffs.p(i, _HHmatrix.e(i,i+1)); - - HHsequence result(_HHmatrix, colOfCoeffs, type); - result._diagSize = diagUpSize; - result._shift = 1; - - return result; - } -} - - HHsequence BiDiagonalUp::makeHHsequence(const char type) const { - auto xType = _HHmatrix.dataType(); - - BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES); - } - - - -BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES); -BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type) const, FLOAT_TYPES); - -} -} -} \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/hhColPivQR.cpp b/libnd4j/include/helpers/cpu/hhColPivQR.cpp deleted file mode 100644 index e118b0bf1..000000000 --- a/libnd4j/include/helpers/cpu/hhColPivQR.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 11.01.2018 -// - -#include -#include -#include - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -HHcolPivQR::HHcolPivQR(const NDArray& matrix) { - - _qr = matrix; - _diagSize = math::nd4j_min(matrix.sizeAt(0), matrix.sizeAt(1)); - _coeffs = NDArrayFactory::create(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext()); - - _permut = NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext()); - - evalData(); -} - - void HHcolPivQR::evalData() { - BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES); - } - -////////////////////////////////////////////////////////////////////////// -template -void HHcolPivQR::_evalData() { - - int rows = _qr.sizeAt(0); - int cols = _qr.sizeAt(1); - - auto transp = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - auto normsUpd = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - auto normsDir = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - - int transpNum = 0; - - for (int k = 0; k < cols; ++k) { - - T norm = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).e(0); - normsDir.p(k, norm); - normsUpd.p(k, norm); - } - - T normScaled = (normsUpd.reduceNumber(reduce::Max)).e(0) * DataTypeUtils::eps(); - T threshold1 = normScaled * normScaled / (T)rows; - T threshold2 = math::nd4j_sqrt(DataTypeUtils::eps()); - - T nonZeroPivots = _diagSize; - T maxPivot = 0.; - - for(int k = 0; k < _diagSize; ++k) { - - int biggestColIndex = normsUpd({0,0, k,-1}).indexReduceNumber(indexreduce::IndexMax).e(0); - T biggestColNorm = normsUpd({0,0, k,-1}).reduceNumber(reduce::Max).e(0); - T biggestColSqNorm = biggestColNorm * biggestColNorm; - biggestColIndex += k; - - if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k)) - nonZeroPivots = k; - - transp.p(k, (T)biggestColIndex); - - if(k != biggestColIndex) { - - auto temp1 = new NDArray(_qr({0,0, k,k+1}, true)); - auto temp2 = new NDArray(_qr({0,0, biggestColIndex,biggestColIndex+1}, true)); - auto temp3 = *temp1; - temp1->assign(temp2); - temp2->assign(temp3); - delete temp1; - delete temp2; - - T e0 = normsUpd.e(k); - T e1 = normsUpd.e(biggestColIndex); - normsUpd.p(k, e1); - normsUpd.p(biggestColIndex, e0); - //math::nd4j_swap(normsUpd(k), normsUpd(biggestColIndex)); - - e0 = normsDir.e(k); - e1 = normsDir.e(biggestColIndex); - normsDir.p(k, e1); - normsDir.p(biggestColIndex, e0); - //math::nd4j_swap(normsDir(k), normsDir(biggestColIndex)); - - ++transpNum; - } - - T normX; - NDArray* qrBlock = new NDArray(_qr({k,rows, k,k+1}, true)); - T c; - Householder::evalHHmatrixDataI(*qrBlock, c, normX); - _coeffs.p(k, c); - delete qrBlock; - - _qr.p(k,k, normX); - - T max = math::nd4j_abs(normX); - if(max > maxPivot) - maxPivot = max; - - if(k < rows && (k+1) < cols) { - qrBlock = new NDArray(_qr({k, rows, k+1,cols}, true)); - auto tail = new NDArray(_qr({k+1,rows, k, k+1}, true)); - Householder::mulLeft(*qrBlock, *tail, _coeffs.e(k)); - delete qrBlock; - delete tail; - } - - for (int j = k + 1; j < cols; ++j) { - - if (normsUpd.e(j) != (T)0.f) { - T temp = math::nd4j_abs(_qr.e(k, j)) / normsUpd.e(j); - temp = (1. + temp) * (1. - temp); - temp = temp < (T)0. ? (T)0. : temp; - T temp2 = temp * normsUpd.e(j) * normsUpd.e(j) / (normsDir.e(j)*normsDir.e(j)); - - if (temp2 <= threshold2) { - if(k+1 < rows && j < cols) - normsDir.p(j, _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).e(0)); - - normsUpd.p(j, normsDir.e(j)); - } - else - normsUpd.p(j, normsUpd.e(j) * math::nd4j_sqrt(temp)); - } - } - } - - _permut.setIdentity(); - - for(int k = 0; k < _diagSize; ++k) { - - int idx = transp.e(k); - auto temp1 = new NDArray(_permut({0,0, k, k+1}, true)); - auto temp2 = new NDArray(_permut({0,0, idx,idx+1}, true)); - auto temp3 = *temp1; - temp1->assign(temp2); - temp2->assign(temp3); - delete temp1; - delete temp2; - } -} - - BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES); - -} -} -} - diff --git a/libnd4j/include/helpers/cpu/householder.cpp b/libnd4j/include/helpers/cpu/householder.cpp deleted file mode 100644 index 69d4ca3db..000000000 --- a/libnd4j/include/helpers/cpu/householder.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 18.12.2017 -// - -#include -#include - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -template -NDArray Householder::evalHHmatrix(const NDArray& x) { - - // input validation - if(!x.isVector() && !x.isScalar()) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!"); - - auto w = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), 1}, x.dataType(), x.getContext()); // column-vector - auto wT = NDArrayFactory::create(x.ordering(), {1, (int)x.lengthOf()}, x.dataType(), x.getContext()); // row-vector (transposed w) - - T coeff; - T normX = x.reduceNumber(reduce::Norm2).e(0); - - if(normX*normX - x.e(0) * x.e(0) <= DataTypeUtils::min() || x.lengthOf() == 1) { - - normX = x.e(0); - coeff = 0.f; - w = 0.f; - - } - else { - - if(x.e(0) >= (T)0.f) - normX = -normX; // choose opposite sign to lessen roundoff error - - T u0 = x.e(0) - normX; - coeff = -u0 / normX; - w.assign(x / u0); - } - - w.p(Nd4jLong(0), 1.f); - wT.assign(&w); - - NDArray identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext()); - identity.setIdentity(); // identity matrix - - return identity - mmul(w, wT) * coeff; -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) { - - // input validation - if(!x.isVector() && !x.isScalar()) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must be vector or scalar!"); - - if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!"); - - normX = x.reduceNumber(reduce::Norm2, nullptr).e(0); - - if(normX*normX - x.e(0) * x.e(0) <= DataTypeUtils::min() || x.lengthOf() == 1) { - - normX = x.e(0); - coeff = (T)0.f; - tail = (T)0.f; - } - else { - - if(x.e(0) >= (T)0.f) - normX = -normX; // choose opposite sign to lessen roundoff error - - T u0 = x.e(0) - normX; - coeff = -u0 / normX; - - if(x.isRowVector()) - tail.assign(static_cast(x({0,0, 1,-1})) / u0); - else - tail.assign(static_cast(x({1,-1, 0,0,})) / u0); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX) { - - int rows = (int)x.lengthOf()-1; - int num = 1; - - if(rows == 0) { - rows = 1; - num = 0; - } - - auto tail = NDArrayFactory::create(x.ordering(), {rows, 1}, x.dataType(), x.getContext()); - evalHHmatrixData(x, tail, coeff, normX); - - if(x.isRowVector()) { - auto temp = x({0,0, num, x.sizeAt(1)}, true); - temp.assign(tail); - } - else { - auto temp = x({num,x.sizeAt(0), 0,0}, true); - temp.assign(tail); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) { - - // if(matrix.rankOf() != 2) - // throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !"; - - if(matrix.sizeAt(0) == 1) { - matrix *= (T) 1.f - coeff; - } - else if(coeff != (T)0.f) { - - auto bottomPart = new NDArray(matrix({1,matrix.sizeAt(0), 0,0}, true)); - auto bottomPartCopy = *bottomPart; - - if(tail.isColumnVector()) { - - auto column = tail; - auto row = tail.transpose(); - auto resultingRow = mmul(row, bottomPartCopy); - auto fistRow = matrix({0,1, 0,0}, true); - resultingRow += fistRow; - fistRow -= resultingRow * coeff; - *bottomPart -= mmul(column, resultingRow) * coeff; - } - else { - - auto row = tail; - auto column = tail.transpose(); - auto resultingRow = mmul(row, bottomPartCopy); - auto fistRow = matrix({0,1, 0,0}, true); - resultingRow += fistRow; - fistRow -= resultingRow * coeff; - *bottomPart -= mmul(column, resultingRow) * coeff; - } - delete bottomPart; - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void Householder::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) { - - // if(matrix.rankOf() != 2) - // throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !"; - - if(matrix.sizeAt(1) == 1) - matrix *= (T)1.f - coeff; - - else if(coeff != (T)0.f) { - - auto rightPart = new NDArray(matrix({0,0, 1,matrix.sizeAt(1)}, true)); - auto rightPartCopy = *rightPart; - auto fistCol = new NDArray(matrix({0,0, 0,1}, true)); - - if(tail.isColumnVector()) { - - auto column = tail; - auto row = tail.transpose(); - auto resultingCol = mmul(rightPartCopy, column); - resultingCol += *fistCol; - *fistCol -= resultingCol * coeff; - *rightPart -= mmul(resultingCol, row) * coeff; - } - else { - - auto row = tail; - auto column = tail.transpose(); - auto resultingCol = mmul(rightPartCopy, column); - resultingCol += *fistCol; - *fistCol -= resultingCol * coeff; - *rightPart -= mmul(resultingCol, row) * coeff; - } - delete rightPart; - delete fistCol; - } -} - - -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; - - - - - - - -} -} -} diff --git a/libnd4j/include/helpers/cpu/svd.cpp b/libnd4j/include/helpers/cpu/svd.cpp index 4e257b267..8a320f6de 100644 --- a/libnd4j/include/helpers/cpu/svd.cpp +++ b/libnd4j/include/helpers/cpu/svd.cpp @@ -22,7 +22,6 @@ #include #include #include -#include namespace sd { @@ -59,19 +58,19 @@ SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const if (_transp) math::nd4j_swap(_calcU, _calcV); - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext()); + // _m.assign(0.); if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); + _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); + _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext()); + // _u.assign(0.); if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); + _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + // _v.assign(0.); } evalData(matrix); @@ -106,19 +105,19 @@ SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const if (_transp) math::nd4j_swap(_calcU, _calcV); - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.f); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext()); + // _m.assign(0.f); if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); + _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); + _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext()); + // _u.assign(0.); if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); + _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + // _v.assign(0.); } } @@ -131,28 +130,27 @@ void SVD::deflation1(int col1, int shift, int ind, int size) { throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !"); int first = col1 + shift; - T cos = _m.e(first, first); - T sin = _m.e(first+ind, first); + T cos = _m.t(first, first); + T sin = _m.t(first+ind, first); T denom = math::nd4j_sqrt(cos*cos + sin*sin); if (denom == (T)0.) { - - _m.p(first+ind, first+ind, 0.f); + _m.r(first+ind, first+ind) = (T)0; return; } cos /= denom; sin /= denom; - _m.p(first,first, denom); - _m.p(first+ind, first, 0.f); - _m.p(first+ind, first+ind, 0.f); + _m.r(first,first) = denom; + _m.r(first+ind, first) = (T)0; + _m.r(first+ind, first+ind) = (T)0; - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0, 0, cos); - rotation.p(0, 1, -sin); - rotation.p(1, 0, sin); - rotation.p(1, 1, cos); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + + rotation.r(0,0) = rotation.r(1,1) = cos; + rotation.r(0,1) = -sin; + rotation.r(1,0) = sin; if (_calcU) { auto temp = _u({col1,col1+size+1, 0,0}, true); @@ -172,28 +170,26 @@ void SVD::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, i if(size <= 0) throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !"); - T cos = _m.e(col1M+ind1, col1M); - T sin = _m.e(col1M+ind2, col1M); + T cos = _m.t(col1M+ind1, col1M); + T sin = _m.t(col1M+ind2, col1M); T denom = math::nd4j_sqrt(cos*cos + sin*sin); if (denom == (T)0.) { - - _m.p(col1M + ind1, col1M + ind1, _m.e(col1M + ind2, col1M + ind2)); + _m.r(col1M+ind1, col1M+ind1) = _m.t(col1M+ind2, col1M+ind2); return; } cos /= denom; sin /= denom; - _m.p(col1M + ind1, col1M, denom); - _m.p(col1M + ind2, col1M + ind2, _m.e(col1M + ind1, col1M + ind1)); - _m.p(col1M + ind2, col1M, 0.f); + _m.r(col1M+ind1, col1M) = denom; + _m.r(col1M+ind2, col1M+ind2) = _m.t(col1M+ind1, col1M+ind1); + _m.r(col1M+ind2, col1M) = (T)0; - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0,0, cos); - rotation.p(1,1, cos); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - rotation.p(0,1, -sin); - rotation.p(1,0, sin); + rotation.r(0,0) = rotation.r(1,1) = cos; + rotation.r(0,1) = -sin; + rotation.r(1,0) = sin; if (_calcU) { auto temp = _u({col1U,col1U+size+1, 0,0}, true); @@ -216,40 +212,40 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh const int len = col2 + 1 - col1; - auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true)); + NDArray colVec0 = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true); - auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); + NDArray diagInterval = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); const T almostZero = DataTypeUtils::min(); T maxElem; if(len == 1) - maxElem = math::nd4j_abs(diagInterval.template e(0)); + maxElem = math::nd4j_abs(diagInterval.template t(0)); else - maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e(0); - T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e(0); + maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template t(0); + T maxElem0 = colVec0.reduceNumber(reduce::AMax).template t(0); T eps = math::nd4j_max(almostZero, DataTypeUtils::eps() * maxElem); T epsBig = (T)8. * DataTypeUtils::eps() * math::nd4j_max(maxElem0, maxElem); - if(diagInterval.template e(0) < epsBig) - diagInterval.p(Nd4jLong(0), epsBig); + if(diagInterval.template t(0) < epsBig) + diagInterval.r(0) = epsBig; for(int i=1; i < len; ++i) - if(math::nd4j_abs(colVec0->template e(i)) < eps) - colVec0->p(i, 0.f); + if(math::nd4j_abs(colVec0.template t(i)) < eps) + colVec0.r(i) = (T)0; for(int i=1; i < len; i++) - if(diagInterval.template e(i) < epsBig) { + if(diagInterval.template t(i) < epsBig) { deflation1(col1, shift, i, len); for(int i = 0; i < len; ++i) - diagInterval.p(i, _m.e(col1+shift+i,col1+shift+i)); + diagInterval.r(i) = _m.t(col1+shift+i,col1+shift+i); } { bool totDefl = true; for(int i=1; i < len; i++) - if(colVec0->template e(i) >= almostZero) { + if(colVec0.template t(i) >= almostZero) { totDefl = false; break; } @@ -261,7 +257,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh int p = 1; for(int i=1; i(diagInterval.template e(i)) < almostZero) + if(math::nd4j_abs(diagInterval.template t(i)) < almostZero) permut[p++] = i; int k = 1, m = ind+1; @@ -271,7 +267,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh permut[p] = m++; else if(m >= len) permut[p] = k++; - else if(diagInterval.template e(k) < diagInterval.template e(m)) + else if(diagInterval.template t(k) < diagInterval.template t(m)) permut[p] = m++; else permut[p] = k++; @@ -281,7 +277,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh if(totDefl) { for(int i=1; i(diagInterval.template e(ki)) < almostZero || diagInterval.template e(0) < diagInterval.template e(ki)) + if(math::nd4j_abs(diagInterval.template t(ki)) < almostZero || diagInterval.template t(0) < diagInterval.template t(ki)) permut[i-1] = permut[i]; else { permut[i-1] = 0; @@ -303,39 +299,26 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh const int ki = permut[len - (totDefl ? i+1 : i)]; const int jac = tCol[ki]; - T _e0 = diagInterval.template e(jac); - //math::nd4j_swap(diagInterval)(i), (*diagInterval)(jac)); - diagInterval.p(jac, diagInterval.template e(i)); - diagInterval.p(i, _e0); + math::nd4j_swap(diagInterval.r(i), diagInterval.r(jac)); - if(i!=0 && jac!=0) { - _e0 = colVec0->template e(jac); - //math::nd4j_swap((*colVec0)(i), (*colVec0)(jac)); - colVec0->p(jac, colVec0->template e(i)); - colVec0->p(i, _e0); - } + if(i!=0 && jac!=0) + math::nd4j_swap(colVec0.r(i), colVec0.r(jac)); if (_calcU) { - auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true); - auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}); + auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}); + temp1.swapUnsafe(temp2); } else { - auto temp1 = _u({0,2, col1+i, col1+i+1}, true); - auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _u({0,2, col1+i, col1+i+1}); + auto temp2 = _u({0,2, col1+jac, col1+jac+1}); + temp1.swapUnsafe(temp2); } if(_calcV) { - auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}, true); - auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}); + auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}); + temp1.swapUnsafe(temp2); } const int tI = tInd[i]; @@ -351,19 +334,17 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh { int i = len-1; - while(i > 0 && (math::nd4j_abs(diagInterval.template e(i)) < almostZero || math::nd4j_abs(colVec0->template e(i)) < almostZero)) + while(i > 0 && (math::nd4j_abs(diagInterval.template t(i)) < almostZero || math::nd4j_abs(colVec0.template t(i)) < almostZero)) --i; for(; i > 1; --i) { - if( (diagInterval.template e(i) - diagInterval.template e(i-1)) < DataTypeUtils::eps()*maxElem ) { - if (math::nd4j_abs(diagInterval.template e(i) - diagInterval.template e(i-1)) >= epsBig) + if( (diagInterval.template t(i) - diagInterval.template t(i-1)) < DataTypeUtils::eps()*maxElem ) { + if (math::nd4j_abs(diagInterval.template t(i) - diagInterval.template t(i-1)) >= epsBig) throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !"); deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len); } } } - - delete colVec0; } @@ -374,10 +355,10 @@ T SVD::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons auto len = permut.lengthOf(); T res = 1.; T item; - for(Nd4jLong i=0; i(i); - item = col0.e(j) / ((diagShifted.e(j) - diff) * (diag.e(j) + shift + diff)); - res += item * col0.e(j); + for(int i=0; i(i); + item = col0.t(j) / ((diagShifted.t(j) - diff) * (diag.t(j) + shift + diff)); + res += item * col0.t(j); } return res; @@ -390,34 +371,34 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra auto len = col0.lengthOf(); auto curLen = len; - while(curLen > 1 && col0.e(curLen-1) == (T)0.f) + while(curLen > 1 && col0.t(curLen-1) == (T)0.f) --curLen; for (Nd4jLong k = 0; k < len; ++k) { - if (col0.e(k) == (T)0.f || curLen==1) { + if (col0.t(k) == (T)0.f || curLen==1) { - singVals.p(k, k==0 ? col0.e(0) : diag.e(k)); - mus.p(k, 0.f); - shifts.p(k, k==0 ? col0.e(0) : diag.e(k)); + singVals.r(k) = k==0 ? col0.t(0) : diag.t(k); + mus.r(k) = (T)0; + shifts.r(k) = k==0 ? col0.t(0) : diag.t(k); continue; } - T left = diag.e(k); + T left = diag.t(k); T right; if(k==curLen-1) - right = diag.e(curLen-1) + col0.reduceNumber(reduce::Norm2).e(0); + right = diag.t(curLen-1) + col0.reduceNumber(reduce::Norm2).t(0); else { int l = k+1; - while(col0.e(l) == (T)0.f) { + while(col0.t(l) == (T)0.f) { ++l; if(l >= curLen) throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !"); } - right = diag.e(l); + right = diag.t(l); } T mid = left + (right - left) / (T)2.; @@ -440,7 +421,7 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra } T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift); - T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); + T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); if (math::nd4j_abs(fPrev) < math::nd4j_abs(fCur)) { math::nd4j_swap(fPrev, fCur); @@ -464,13 +445,12 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra if (shift == left && (muCur < (T)0. || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) + else if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) useBisection = true; - if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) + else if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) useBisection = true; } - if (useBisection) { T leftShifted, rightShifted; @@ -479,7 +459,6 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6); } else { - leftShifted = -(right - left) * (T)0.6; rightShifted = -DataTypeUtils::min(); } @@ -502,14 +481,12 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra } muCur = (leftShifted + rightShifted) / (T)2.; } - singVals.p(k, shift + muCur); - shifts.p(k, shift); - mus.p(k, muCur); + singVals.r(k) = shift + muCur; + shifts.r(k) = shift; + mus.r(k) = muCur; } - } - ////////////////////////////////////////////////////////////////////////// template void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals, const NDArray& shifts, const NDArray& mus, NDArray& zhat) { @@ -517,29 +494,29 @@ void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& pe int n = col0.lengthOf(); int m = permut.lengthOf(); if(m==0) { - zhat.assign(0.); + zhat.nullify(); return; } - int last = permut.e(m-1); + int last = permut.t(m-1); for (int k = 0; k < n; ++k) { - if (col0.e(k) == (T)0.f) - zhat.p(k, (T)0.f); + if (col0.t(k) == (T)0.f) + zhat.r(k) = (T)0; else { - T dk = diag.e(k); - T prod = (singVals.e(last) + dk) * (mus.e(last) + (shifts.e(last) - dk)); + T dk = diag.t(k); + T prod = (singVals.t(last) + dk) * (mus.t(last) + (shifts.t(last) - dk)); for(int l = 0; l(l); + int i = (int)permut.t(l); if(i!=k) { - int j = i(l-1); - prod *= ((singVals.e(j)+dk) / ((diag.e(i)+dk))) * ((mus.e(j)+(shifts.e(j)-dk)) / ((diag.e(i)-dk))); + int j = i(l-1); + prod *= ((singVals.t(j)+dk) / ((diag.t(i)+dk))) * ((mus.t(j)+(shifts.t(j)-dk)) / ((diag.t(i)-dk))); } } T tmp = math::nd4j_sqrt(prod); - zhat.p(k, col0.e(k) > (T)0.f ? tmp : -tmp); + zhat.r(k) = col0.t(k) > (T)0 ? tmp : -tmp; } } } @@ -555,48 +532,46 @@ void SVD::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArra for (int k = 0; k < n; ++k) { - auto colU = new NDArray(U({0,0, k,k+1}, true)); - *colU = 0.; - NDArray* colV = nullptr; + NDArray colU = U({0,0, k,k+1}); + colU.nullify(); + + NDArray colV; if (_calcV) { - colV = new NDArray(V({0,0, k,k+1}, true)); - *colV = 0.; + colV = V({0,0, k,k+1}); + colV.nullify(); } - if (zhat.e(k) == (T)0.f) { - colU->p(k, 1.f); + if (zhat.t(k) == (T)0.f) { + colU.r(k) = (T)1; if (_calcV) - colV->p(k, 1.f); + colV.r(k) = (T)1; } else { for(int l = 0; l < m; ++l) { - int i = perm.e(l); - U.p(i,k, zhat.e(i)/(((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); + int i = (int)perm.t(l); + U.r(i,k) = zhat.t(i)/(((diag.t(i) - shifts.t(k)) - mus.t(k)) )/( (diag.t(i) + singVals.t(k))); } - U.p(n,k, 0.f); - *colU /= colU->reduceNumber(reduce::Norm2); + U.r(n,k) = (T)0; + colU /= colU.reduceNumber(reduce::Norm2); if (_calcV) { for(int l = 1; l < m; ++l){ - int i = perm.e(l); - V.p(i,k, diag.e(i) * zhat.e(i) / (((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); + int i = perm.t(l); + V.r(i,k) = diag.t(i) * zhat.t(i) / (((diag.t(i) - shifts.t(k)) - mus.t(k)) )/( (diag.t(i) + singVals.t(k))); } - V.p(0,k, -1.f); - *colV /= colV->reduceNumber(reduce::Norm2); + V.r(0,k) = (T)-1; + colV /= colV.reduceNumber(reduce::Norm2); } } - delete colU; - if (_calcV) - delete colV; } - auto colU = U({0,0, n,n+1}, true); - colU = 0.; - colU.p(n, 1.); + NDArray colU = U({0,0, n,n+1}); + colU.nullify(); + colU.r(n) = (T)1; } @@ -608,26 +583,29 @@ void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA auto col0 = _m({col1, col1+size, col1, col1+1}, true); auto diag = static_cast(_m({col1, col1+size, col1, col1+size}, true).diagonal('c')); - diag.p(Nd4jLong(0), T(0)); - singVals = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - U = NDArrayFactory::create(_u.ordering(), {size+1, size+1}, _u.getContext()); + diag.r(0) = (T)0; + singVals = NDArray(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + U = NDArray(_u.ordering(), {size+1, size+1}, _u.dataType(), _u.getContext()); if (_calcV) - V = NDArrayFactory::create(_v.ordering(), {size, size}, _v.getContext()); + V = NDArray(_v.ordering(), {size, size}, _v.dataType(), _v.getContext()); int curSize = size; - while(curSize > 1 && diag.template e(curSize-1) == (T)0.f) + while(curSize > 1 && diag.template t(curSize-1) == (T)0.f) --curSize; int m = 0; - std::vector indices; + std::vector indices; for(int k = 0; k < curSize; ++k) - if(math::nd4j_abs(col0.template e(k)) > almostZero) - indices.push_back((T)k); + if(math::nd4j_abs(col0.template t(k)) > almostZero) + indices.push_back(k); - auto permut = NDArrayFactory::create(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext()); - auto shifts = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto mus = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto zhat = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); + NDArray permut(_m.ordering(), {(int)indices.size()}, _m.dataType(), _m.getContext()); + for(int k = 0; k < indices.size(); ++k) + permut.r(k) = (T)indices[k]; + + NDArray shifts(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + NDArray mus(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + NDArray zhat(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); calcSingVals(col0, diag, permut, singVals, shifts, mus); perturb(col0, diag, permut, singVals, shifts, mus, zhat); @@ -635,53 +613,39 @@ void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA for(int i=0; i(i) > singVals.e(i+1)) { - T _e0 = singVals.e(i); - T _e1 = singVals.e(i+1); - //math::nd4j_swap(singVals(i),singVals(i+1)); - singVals.p(i, _e1); - singVals.p(i+1, _e0); + if(singVals.t(i) > singVals.t(i+1)) { - auto temp1 = U({0,0, i,i+1}, true); - auto temp2 = U({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + math::nd4j_swap(singVals.r(i), singVals.r(i+1)); + + auto temp1 = U({0,0, i,i+1}); + auto temp2 = U({0,0, i+1,i+2}); + temp1.swapUnsafe(temp2); if(_calcV) { - auto temp1 = V({0,0, i,i+1}, true); - auto temp2 = V({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = V({0,0, i,i+1}); + auto temp2 = V({0,0, i+1,i+2}); + temp1.swapUnsafe(temp2); } } } - auto temp1 = singVals({0,curSize, 0,0}, true); - for (int e = 0; e < curSize / 2; ++e) { - T tmp = temp1.e(e); - temp1.p(e, temp1.e(curSize-1-e)); - temp1.p(curSize-1-e, tmp); - } + auto temp1 = singVals({0,curSize, 0,0}); + for (int e = 0; e < curSize / 2; ++e) + math::nd4j_swap(temp1.r(e), temp1.r(curSize-1-e)); auto temp2 = U({0,0, 0,curSize}, true); for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); + auto temp3 = temp2({0,0, i,i+1}); + auto temp4 = temp2({0,0, curSize-1-i,curSize-i}); + temp3.swapUnsafe(temp4); } if (_calcV) { auto temp2 = V({0,0, 0,curSize}, true); for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); + auto temp3 = temp2({0,0, i,i+1}); + auto temp4 = temp2({0,0, curSize-1-i,curSize-i}); + temp3.swapUnsafe(temp4); } } } @@ -695,54 +659,45 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif const int n = col2 - col1 + 1; const int k = n/2; const T almostZero = DataTypeUtils::min(); - T alphaK; - T betaK; - T r0; - T lambda, phi, c0, s0; - auto l = NDArrayFactory::create(_u.ordering(), {1, k}, _u.getContext()); - auto f = NDArrayFactory::create(_u.ordering(), {1, n-k-1}, _u.getContext()); + T alphaK, betaK, r0, lambda, phi, c0, s0; + + NDArray l(_u.ordering(), {1, k}, _u.dataType(), _u.getContext()); + NDArray f(_u.ordering(), {1, n-k-1}, _u.dataType(), _u.getContext()); if(n < _switchSize) { JacobiSVD jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV); - if (_calcU) { - auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true); - temp.assign(jac._u); - } + if (_calcU) + _u({col1,col1+n+1, col1,col1+n+1}, true).assign(jac._u); else { - auto temp1 = _u({0,1, col1,col1+n+1}, true); - temp1.assign(jac._u({0,1, 0,0}, true)); - auto temp2 = _u({1,2, col1,col1+n+1}, true); - temp2.assign(jac._u({n,n+1, 0,0}, true)); + _u({0,1, col1,col1+n+1}, true).assign(jac._u({0,1, 0,0}, true)); + _u({1,2, col1,col1+n+1}, true).assign(jac._u({n,n+1, 0,0}, true)); } - if (_calcV) { - auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true); - temp.assign(jac._v); - } + if (_calcV) + _v({row1W,row1W+n, col1W,col1W+n}, true).assign(jac._v); - auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true); - temp.assign(0.); + _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true).nullify(); auto diag = _m.diagonal('c'); diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true)); return; } - alphaK = _m.e(col1 + k, col1 + k); - betaK = _m.e(col1 + k + 1, col1 + k); + alphaK = _m.t(col1 + k, col1 + k); + betaK = _m.t(col1 + k + 1, col1 + k); DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift); DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1); if (_calcU) { - lambda = _u.e(col1 + k, col1 + k); - phi = _u.e(col1 + k + 1, col2 + 1); + lambda = _u.t(col1 + k, col1 + k); + phi = _u.t(col1 + k + 1, col2 + 1); } else { - lambda = _u.e(1, col1 + k); - phi = _u.e(0, col2 + 1); + lambda = _u.t(1, col1 + k); + phi = _u.t(0, col2 + 1); } r0 = math::nd4j_sqrt((math::nd4j_abs(alphaK * lambda) * math::nd4j_abs(alphaK * lambda)) + math::nd4j_abs(betaK * phi) * math::nd4j_abs(betaK * phi)); @@ -757,7 +712,7 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif } if (_calcV) - _v.p(row1W+k, col1W, 1.f); + _v.r(row1W+k, col1W) = (T)1; if (r0 < almostZero){ c0 = 1.; @@ -770,39 +725,37 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif if (_calcU) { - auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true); - NDArray q1(temp); + NDArray q1 = _u({col1,col1+k+1, col1+k,col1+k+1}, true).dup(); - for (int i = col1 + k - 1; i >= col1; --i) { - auto temp = _u({col1,col1+k+1, i+1,i+2}, true); - temp.assign(_u({col1, col1+k+1, i, i+1}, true)); - } + for (int i = col1 + k - 1; i >= col1; --i) + _u({col1,col1+k+1, i+1,i+2}, true).assign(_u({col1,col1+k+1, i,i+1}, true)); + + NDArray temp1 = _u({col1+k+1,col1+n+1, col2+1,col2+2}, true); _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0); _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0)); - _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0); - _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0; + _u({col1+k+1,col1+n+1, col1,col1+1}, true).assign(temp1 * s0); + temp1 *= c0; } else { - T q1 = _u.e(0, col1 + k); + T q1 = _u.t(0, col1 + k); for (int i = col1 + k - 1; i >= col1; --i) - _u.p(0, i+1, _u.e(0, i)); + _u.r(0, i+1) = _u.r(0, i); - _u.p(0, col1, q1 * c0); - _u.p(0, col2+1, -q1*s0); - _u.p(1, col1, _u.e(1, col2+1) * s0); - _u.p(1, col2 + 1, _u.e(1, col2 + 1) * c0); - _u({1,2, col1+1, col1+k+1}, true) = 0.f; - _u({0,1, col1+k+1, col1+n}, true) = 0.f; + _u.r(0, col1) = q1 * c0; + _u.r(0, col2+1) = -q1*s0; + _u.r(1, col1) = _u.t(1, col2+1) * s0; + _u.r(1, col2+1) = _u.t(1, col2+1) * c0; + _u({1,2, col1+1, col1+k+1}).nullify(); + _u({0,1, col1+k+1, col1+n}).nullify(); } - _m.p(col1 + shift, col1 + shift, r0); - auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true); - temp1.assign(l*alphaK); - auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true); - temp2.assign(f*betaK); + _m.r(col1+shift, col1+shift) = r0; + + _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true).assign(l*alphaK); + _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true).assign(f*betaK); deflation(col1, col2, k, row1W, col1W, shift); @@ -810,26 +763,22 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD); if(_calcU) { - auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); + auto temp = _u({col1, col1+n+1, col1,col1+n+1}, true); + temp.assign(mmul(temp, UofSVD)); } else { - auto pTemp = _u({0,0, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); + auto temp = _u({0,0, col1,col1+n+1}, true); + temp.assign(mmul(temp, UofSVD)); } if (_calcV) { - auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, VofSVD)); + auto temp = _v({row1W,row1W+n, row1W,row1W+n}, true); + temp.assign(mmul(temp, VofSVD)); } auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true); - blockM = 0.f; - auto diag = blockM.diagonal('c'); - diag.assign(singVals); + blockM.nullify(); + blockM.diagonal('c').assign(singVals); } ////////////////////////////////////////////////////////////////////////// @@ -839,24 +788,22 @@ void SVD::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDAr if (_calcU) { int colsU = _fullUV ? hhU.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_u.ordering(), {hhU.rows(), colsU}, _u.getContext()); + NDArray temp1(_u.ordering(), {hhU.rows(), colsU}, _u.dataType(), _u.getContext()); temp1.setIdentity(); _u = temp1; - auto temp2 = _u({0,_diagSize, 0,_diagSize}, true); - temp2.assign(V({0,_diagSize, 0,_diagSize}, true)); + _u({0,_diagSize, 0,_diagSize}, true).assign(V({0,_diagSize, 0,_diagSize}, true)); const_cast(hhU).mulLeft(_u); } if (_calcV) { int colsV = _fullUV ? hhV.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_v.ordering(), {hhV.rows(), colsV}, _v.getContext()); + NDArray temp1(_v.ordering(), {hhV.rows(), colsV}, _v.dataType(), _v.getContext()); temp1.setIdentity(); _v = temp1; - auto temp2 = _v({0,_diagSize, 0,_diagSize}, true); - temp2.assign(U({0,_diagSize, 0,_diagSize}, true)); + _v({0,_diagSize, 0,_diagSize}, true).assign(U({0,_diagSize, 0,_diagSize}, true)); const_cast(hhV).mulLeft(_v); } } @@ -881,48 +828,40 @@ void SVD::evalData(const NDArray& matrix) { return; } - T scale = matrix.reduceNumber(reduce::AMax).e(0); + T scale = matrix.reduceNumber(reduce::AMax).t(0); if(scale == (T)0.) scale = 1.; - NDArray copy; - if(_transp) - copy = matrix.transpose(); - else - copy = matrix / scale; + BiDiagonalUp biDiag(_transp ? matrix.transpose() : matrix / scale); - BiDiagonalUp biDiag(copy); + _u.nullify(); + _v.nullify(); - _u = 0.; - _v = 0.; + _m({0,_diagSize, 0,0}, true).assign(biDiag._HHbidiag.transpose()); - auto temp1 = biDiag._HHbidiag.transpose(); - auto temp2 = _m({0,_diagSize, 0,0}, true); - temp2.assign(temp1); - - - auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true); - temp3.assign(0.); + _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}).nullify(); DivideAndConquer(0, _diagSize - 1, 0, 0, 0); for (int i = 0; i < _diagSize; ++i) { - T a = math::nd4j_abs(_m.e(i, i)); - _s.p(i, a * scale); + T a = math::nd4j_abs(_m.t(i, i)); + _s.r(i) = a * scale; if (a < almostZero) { - auto temp = _s({i+1,_diagSize, 0,0}, true); - temp.assign(0.); + _s({i+1,_diagSize, 0,0}).nullify(); break; } else if (i == _diagSize-1) break; } + HHsequence hhV = biDiag.makeHHsequence('v'); + HHsequence hhU = biDiag.makeHHsequence('u'); + if(_transp) - exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u); + exchangeUV(hhV, hhU, _v, _u); else - exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v); + exchangeUV(hhU, hhV, _u, _v); } diff --git a/libnd4j/include/helpers/hhSequence.h b/libnd4j/include/helpers/hhSequence.h index 31855a86c..1e1f8ecad 100644 --- a/libnd4j/include/helpers/hhSequence.h +++ b/libnd4j/include/helpers/hhSequence.h @@ -27,35 +27,35 @@ namespace sd { namespace ops { namespace helpers { - + class HHsequence { public: - + /* * matrix containing the Householder vectors */ - NDArray _vectors; + const NDArray& _vectors; /* * vector containing the Householder coefficients */ - NDArray _coeffs; - + const NDArray& _coeffs; + /* - * shift of the Householder sequence + * shift of the Householder sequence */ int _shift; /* * length of the Householder sequence */ - int _diagSize; + int _diagSize; - /* + /* * type of sequence, type = 'u' (acting on columns, left) or type = 'v' (acting on rows, right) */ - char _type; + char _type; /* * constructor @@ -64,18 +64,18 @@ class HHsequence { /** * this method mathematically multiplies input matrix on Householder sequence from the left H0*H1*...Hn * matrix - * + * * matrix - input matrix to be multiplied */ template - void _mulLeft(NDArray& matrix); + void mulLeft_(NDArray& matrix); void mulLeft(NDArray& matrix); NDArray getTail(const int idx) const; template - void _applyTo(NDArray& dest); + void applyTo_(NDArray& dest); void applyTo(NDArray& dest); @@ -87,8 +87,8 @@ class HHsequence { ////////////////////////////////////////////////////////////////////////// FORCEINLINE int HHsequence::rows() const { - return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); -} + return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); +} diff --git a/libnd4j/include/helpers/householder.h b/libnd4j/include/helpers/householder.h index e71769901..7811fafa0 100644 --- a/libnd4j/include/helpers/householder.h +++ b/libnd4j/include/helpers/householder.h @@ -32,74 +32,74 @@ template class Householder { public: - + /** * this method calculates Householder matrix P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...] * w = u / u0 * u = x - |x|*e0 - * u0 = x0 - |x| + * u0 = x0 - |x| * e0 = [1, 0, 0 , 0, ...] - * + * * x - input vector, remains unaffected - */ - static NDArray evalHHmatrix(const NDArray& x); + */ + // static NDArray evalHHmatrix(const NDArray& x); /** * this method evaluates data required for calculation of Householder matrix P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...] * w = u / u0 * u = x - |x|*e0 - * u0 = x0 - |x| + * u0 = x0 - |x| * e0 = [1, 0, 0 , 0, ...] - * + * * x - input vector, remains unaffected * tail - the essential part of the vector w: [w1, w2, w3, ...] * normX - this scalar is the first non-zero element in vector resulting from Householder transformation -> (P*x) - * coeff - scalar, scaling factor in Householder matrix formula + * coeff - scalar, scaling factor in Householder matrix formula */ static void evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX); - static void evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX); + static void evalHHmatrixDataI(NDArray& x, T& coeff, T& normX); // in-place, x to be affected /** * this method mathematically multiplies input matrix on Householder from the left P * matrix - * + * * matrix - input matrix * tail - the essential part of the Householder vector w: [w1, w2, w3, ...] - * coeff - scalar, scaling factor in Householder matrix formula + * coeff - scalar, scaling factor in Householder matrix formula */ static void mulLeft(NDArray& matrix, const NDArray& tail, const T coeff); /** * this method mathematically multiplies input matrix on Householder from the right matrix * P - * + * * matrix - input matrix * tail - the essential part of the Householder vector w: [w1, w2, w3, ...] - * coeff - scalar, scaling factor in Householder matrix formula - */ + * coeff - scalar, scaling factor in Householder matrix formula + */ static void mulRight(NDArray& matrix, const NDArray& tail, const T coeff); - + }; - + // /** // * this function reduce given matrix to upper bidiagonal form (in-place operation), matrix must satisfy following condition rows >= cols - // * - // * matrix - input 2D matrix to be reduced to upper bidiagonal from + // * + // * matrix - input 2D matrix to be reduced to upper bidiagonal from // */ // template // void biDiagonalizeUp(NDArray& matrix); - // /** + // /** // * given a matrix [m,n], this function computes its singular value decomposition matrix = u * s * v^T - // * + // * // * matrix - input 2D matrix to decompose, [m, n] // * u - unitary matrix containing left singular vectors of input matrix, [m, m] // * s - diagonal matrix with singular values of input matrix (non-negative) on the diagonal sorted in decreasing order, @@ -109,7 +109,7 @@ class Householder { // * fullUV - if false then only p (p is smaller among m and n) first columns of u and v will be calculated and their dimensions in this case are [m, p] and [n, p] // * // */ - // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false) + // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false) diff --git a/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp new file mode 100644 index 000000000..6eeb0c28b --- /dev/null +++ b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp @@ -0,0 +1,293 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +EigenValsAndVecs::EigenValsAndVecs(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input array must be 2D square matrix !"); + + Schur schur(matrix); + + NDArray& schurMatrixU = schur._U; + NDArray& schurMatrixT = schur._T; + + _Vecs = NDArray(matrix.ordering(), {schurMatrixU.sizeAt(1), schurMatrixU.sizeAt(1), 2}, matrix.dataType(), matrix.getContext()); + _Vals = NDArray(matrix.ordering(), {matrix.sizeAt(1), 2}, matrix.dataType(), matrix.getContext()); + + // sequence of methods calls matters + calcEigenVals(schurMatrixT); + calcPseudoEigenVecs(schurMatrixT, schurMatrixU); // pseudo-eigenvectors are real and will be stored in schurMatrixU + calcEigenVecs(schurMatrixU); +} + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcEigenVals(const NDArray& schurMatrixT) { + + const int numOfCols = schurMatrixT.sizeAt(1); + + // calculate eigenvalues _Vals + int i = 0; + while (i < numOfCols) { + + if (i == numOfCols - 1 || schurMatrixT.t(i+1, i) == T(0.f)) { + + _Vals.r(i, 0) = schurMatrixT.t(i, i); // real part + _Vals.r(i, 1) = T(0); // imaginary part + + if(!math::nd4j_isfin(_Vals.t(i, 0))) { + throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !"); + return; + } + + ++i; + } + else { + + T p = T(0.5) * (schurMatrixT.t(i, i) - schurMatrixT.t(i+1, i+1)); + T z; + { + T t0 = schurMatrixT.t(i+1, i); + T t1 = schurMatrixT.t(i, i+1); + T maxval = math::nd4j_max(math::nd4j_abs(p), math::nd4j_max(math::nd4j_abs(t0), math::nd4j_abs(t1))); + t0 /= maxval; + t1 /= maxval; + T p0 = p / maxval; + z = maxval * math::nd4j_sqrt(math::nd4j_abs(p0 * p0 + t0 * t1)); + } + + _Vals.r(i, 0) = _Vals.r(i+1, 0) = schurMatrixT.t(i+1, i+1) + p; + _Vals.r(i, 1) = z; + _Vals.r(i+1,1) = -z; + + if(!(math::nd4j_isfin(_Vals.t(i,0)) && math::nd4j_isfin(_Vals.t(i+1,0)) && math::nd4j_isfin(_Vals.t(i,1))) && math::nd4j_isfin(_Vals.t(i+1,1))) { + throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !"); + return; + } + + i += 2; + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU) { + + const int numOfCols = schurMatrixU.sizeAt(1); + + T norm = 0; + for (int j = 0; j < numOfCols; ++j) + norm += schurMatrixT({j,j+1, math::nd4j_max(j-1, 0),numOfCols}).reduceNumber(reduce::ASum).template t(0); + + if (norm == T(0)) + return; + + for (int n = numOfCols-1; n >= 0; n--) { + + T p = _Vals.t(n, 0); // real part + T q = _Vals.t(n, 1); // imaginary part + + if(q == (T)0) { // not complex + + T lastr((T)0), lastw((T)0); + int l = n; + + schurMatrixT.r(n, n) = T(1); + + for (int i = n-1; i >= 0; i--) { + + T w = schurMatrixT.t(i,i) - p; + T r = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t(0); // dot + + if (_Vals.t(i, 1) < T(0)) { + lastw = w; + lastr = r; + } + else { + + l = i; + if (_Vals.t(i, 1) == T(0)) { + + if (w != T(0)) + schurMatrixT.r(i, n) = -r / w; + else + schurMatrixT.r(i, n) = -r / (DataTypeUtils::eps() * norm); + } + else { + + T x = schurMatrixT.t(i, i+1); + T y = schurMatrixT.t(i+1, i); + T denom = (_Vals.t(i, 0) - p) * (_Vals.t(i, 0) - p) + _Vals.t(i, 1) * _Vals.t(i, 1); + T t = (x * lastr - lastw * r) / denom; + schurMatrixT.r(i, n) = t; + + if (math::nd4j_abs(x) > math::nd4j_abs(lastw)) + schurMatrixT.r(i+1, n) = (-r - w * t) / x; + else + schurMatrixT.r(i+1, n) = (-lastr - y * t) / lastw; + } + + + T t = math::nd4j_abs(schurMatrixT.t(i, n)); + if((DataTypeUtils::eps() * t) * t > T(1)) + schurMatrixT({schurMatrixT.sizeAt(0)-numOfCols+i,-1, n,n+1}) /= t; + } + } + } + else if(q < T(0) && n > 0) { // complex + + T lastra(0), lastsa(0), lastw(0); + int l = n - 1; + + if(math::nd4j_abs(schurMatrixT.t(n, n-1)) > math::nd4j_abs(schurMatrixT.t(n-1, n))) { + + schurMatrixT.r(n-1, n-1) = q / schurMatrixT.t(n, n-1); + schurMatrixT.r(n-1, n) = -(schurMatrixT.t(n, n) - p) / schurMatrixT.t(n, n-1); + } + else { + divideComplexNums(T(0),-schurMatrixT.t(n-1,n), schurMatrixT.t(n-1,n-1)-p,q, schurMatrixT.r(n-1,n-1),schurMatrixT.r(n-1,n)); + } + + schurMatrixT.r(n,n-1) = T(0); + schurMatrixT.r(n,n) = T(1); + + for (int i = n-2; i >= 0; i--) { + + T ra = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n-1,n}, true)).template t(0); // dot + T sa = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t(0); // dot + + T w = schurMatrixT.t(i,i) - p; + + if (_Vals.t(i, 1) < T(0)) { + lastw = w; + lastra = ra; + lastsa = sa; + } + else { + + l = i; + + if (_Vals.t(i, 1) == T(0)) { + divideComplexNums(-ra,-sa, w,q, schurMatrixT.r(i,n-1),schurMatrixT.r(i,n)); + } + else { + + T x = schurMatrixT.t(i,i+1); + T y = schurMatrixT.t(i+1,i); + T vr = (_Vals.t(i, 0) - p) * (_Vals.t(i, 0) - p) + _Vals.t(i, 1) * _Vals.t(i, 1) - q * q; + T vi = (_Vals.t(i, 0) - p) * T(2) * q; + + if ((vr == T(0)) && (vi == T(0))) + vr = DataTypeUtils::eps() * norm * (math::nd4j_abs(w) + math::nd4j_abs(q) + math::nd4j_abs(x) + math::nd4j_abs(y) + math::nd4j_abs(lastw)); + + divideComplexNums(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra, vr,vi, schurMatrixT.r(i,n-1),schurMatrixT.r(i,n)); + + if(math::nd4j_abs(x) > (math::nd4j_abs(lastw) + math::nd4j_abs(q))) { + + schurMatrixT.r(i+1,n-1) = (-ra - w * schurMatrixT.t(i,n-1) + q * schurMatrixT.t(i,n)) / x; + schurMatrixT.r(i+1,n) = (-sa - w * schurMatrixT.t(i,n) - q * schurMatrixT.t(i,n-1)) / x; + } + else + divideComplexNums(-lastra-y*schurMatrixT.t(i,n-1),-lastsa-y*schurMatrixT.t(i,n), lastw,q, schurMatrixT.r(i+1,n-1),schurMatrixT.r(i+1,n)); + } + + T t = math::nd4j_max(math::nd4j_abs(schurMatrixT.t(i, n-1)), math::nd4j_abs(schurMatrixT.t(i,n))); + if ((DataTypeUtils::eps() * t) * t > T(1)) + schurMatrixT({i,numOfCols, n-1,n+1}) /= t; + } + } + n--; + } + else + throw std::runtime_error("ops::helpers::EigenValsAndVecs::calcEigenVecs: internal bug !"); + } + + for (int j = numOfCols-1; j >= 0; j--) + schurMatrixU({0,0, j,j+1}, true).assign( mmul(schurMatrixU({0,0, 0,j+1}, true), schurMatrixT({0,j+1, j,j+1}, true)) ); +} + + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcEigenVecs(const NDArray& schurMatrixU) { + + const T precision = T(2) * DataTypeUtils::eps(); + + const int numOfCols = schurMatrixU.sizeAt(1); + + for (int j = 0; j < numOfCols; ++j) { + + if(math::nd4j_abs(_Vals.t(j, 1)) <= math::nd4j_abs(_Vals.t(j, 0)) * precision || j+1 == numOfCols) { // real + + _Vecs.syncToDevice(); + _Vecs({0,0, j,j+1, 0,1}).assign(schurMatrixU({0,0, j,j+1})); + _Vecs({0,0, j,j+1, 1,2}) = (T)0; + + // normalize + const T norm2 = _Vecs({0,0, j,j+1, 0,1}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j,j+1, 0,1}) /= math::nd4j_sqrt(norm2); + } + else { // complex + + for (int i = 0; i < numOfCols; ++i) { + _Vecs.r(i, j, 0) = _Vecs.r(i, j+1, 0) = schurMatrixU.t(i, j); + _Vecs.r(i, j, 1) = schurMatrixU.t(i, j+1); + _Vecs.r(i, j+1, 1) = -schurMatrixU.t(i, j+1); + } + + // normalize + T norm2 = _Vecs({0,0, j,j+1, 0,0}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j,j+1, 0,0}) /= math::nd4j_sqrt(norm2); + + // normalize + norm2 = _Vecs({0,0, j+1,j+2, 0,0}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j+1,j+2, 0,0}) /= math::nd4j_sqrt(norm2); + + ++j; + } + } +} + + +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/FullPivLU.cpp b/libnd4j/include/helpers/impl/FullPivLU.cpp new file mode 100644 index 000000000..efb7571ed --- /dev/null +++ b/libnd4j/include/helpers/impl/FullPivLU.cpp @@ -0,0 +1,170 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +// A{M,K} * x{K,N} = b{M,N} +template +void FullPivLU::solve(const NDArray& A, const NDArray& b, NDArray& x) { + + if(A.rankOf() != 2) + throw std::runtime_error("FullPivLU::solve: input matrix A must be 2D !"); + + if(A.sizeAt(0) != b.sizeAt(0)) + throw std::runtime_error("FullPivLU::solve: A and b must have the same number of rows !"); + + if(A.sizeAt(1) != x.sizeAt(0)) + throw std::runtime_error("FullPivLU::solve: number of A columns must be equal to number of x rows !"); + + NDArray LU = A.dup(); + + const int rows = LU.sizeAt(0); + const int cols = LU.sizeAt(1); + const int diagLen = math::nd4j_min(rows, cols); + + std::vector rowsInds(rows), colsInds(cols); + + int numOfTranspos = 0; + int nonZeroPivots1 = diagLen; + + T maxPivot = T(0); + + for(int k = 0; k < diagLen; ++k) { + + NDArray bottomRightCorner = LU({k,rows, k,cols}, true); + const int indPivot = static_cast(bottomRightCorner.indexReduceNumber(indexreduce::IndexAbsoluteMax).t(0)); + + int colPivot = indPivot % (cols-k); + int rowPivot = indPivot / (cols-k); + + T currentMax = math::nd4j_abs(bottomRightCorner.t(rowPivot, colPivot)); + + // take into account that this was calculated in corner, not in whole LU + rowPivot += k; + colPivot += k; + + if(currentMax == T(0)) { + + nonZeroPivots1 = k; + + for(int i = k; i < diagLen; ++i) + rowsInds[i] = colsInds[i] = i; + + break; + } + + if(currentMax > maxPivot) + maxPivot = currentMax; + + rowsInds[k] = rowPivot; + colsInds[k] = colPivot; + + if(k != rowPivot) { + NDArray row1 = LU({k,k+1, 0,0}, true); + NDArray row2 = LU({rowPivot,rowPivot+1, 0,0}, true); + row1.swapUnsafe(row2); + ++numOfTranspos; + } + if(k != colPivot) { + NDArray col1 = LU({0,0, k,k+1}, true); + NDArray col2 = LU({0,0, colPivot,colPivot+1}, true); + col1.swapUnsafe(col2); + ++numOfTranspos; + } + + if(k < rows-1) + LU({k+1,rows, k,k+1}, true) /= LU.t(k, k); + + if(k < diagLen-1) + LU({k+1,rows, k+1,cols},true) -= mmul(LU({k+1,rows, k,k+1},true), LU({k,k+1, k+1,cols},true)); + } + + //***************************************************// + + const T threshold = maxPivot * DataTypeUtils::eps() * (T)diagLen; + + int nonZeroPivots2 = 0; + for(int i = 0; i < nonZeroPivots1; ++i) + nonZeroPivots2 += static_cast(math::nd4j_abs(LU.t(i,i)) > threshold); + + if(nonZeroPivots2 == 0) { + x.nullify(); + return; + } + + //***************************************************// + + std::vector rowsPermut1(rows), rowsPermut2(rows), colsPermut(cols); + std::iota(rowsPermut1.begin(), rowsPermut1.end(), 0); + std::iota(colsPermut.begin(), colsPermut.end(), 0); + + for(int k = diagLen-1; k >= 0; --k) + math::nd4j_swap(rowsPermut1[k], rowsPermut1[rowsInds[k]]); + + for(int k = 0; k < diagLen; ++k) + math::nd4j_swap(colsPermut[k], colsPermut[colsInds[k]]); + + for(int i = 0; i < rows; ++i) + for(int j = 0; j < rows; ++j) + if(i == rowsPermut1[j]) { rowsPermut2[i] = j; break; } + + //***************************************************// + + NDArray c = b.ulike(); + + for (int i = 0; i < rows; ++i) + c({i,i+1, 0,0}, true).assign(b({rowsPermut2[i],rowsPermut2[i]+1, 0,0}, true)); + + + NDArray cTopRows1 = c({0,diagLen, 0,0}, true); + // TriangularSolver::solve(LU({0,diagLen, 0,diagLen}, true), cTopRows1, true, true, cTopRows1); + ops::helpers::triangularSolve2D(nullptr, LU({0,diagLen, 0,diagLen}, true), cTopRows1,true,true, cTopRows1); + + if(rows > cols) + c({cols,-1, 0,0}, true) -= mmul(LU({cols,-1, 0,0},true), c({0,cols, 0,0}, true)); + + NDArray cTopRows2 = c({0,nonZeroPivots2, 0,0}, true); + // TriangularSolver::solve(LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true), cTopRows2, false, false, cTopRows2); + ops::helpers::triangularSolve2D(nullptr, LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true),cTopRows2,false,false, cTopRows2); + + for(int i = 0; i < nonZeroPivots2; ++i) + x({colsPermut[i],colsPermut[i]+1, 0,0}, true).assign(c({i,i+1, 0,0}, true)); + + for(int i = nonZeroPivots2; i < cols; ++i) + x({colsPermut[i],colsPermut[i]+1, 0,0}, true).nullify(); +} + +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; + +} +} +} diff --git a/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp new file mode 100644 index 000000000..31495cab9 --- /dev/null +++ b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp @@ -0,0 +1,383 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +template +Hessenberg::Hessenberg(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::Hessenberg constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) == 1) { + _Q = NDArray(matrix.ordering(), {1,1}, matrix.dataType(), matrix.getContext()); + _Q = 1; + _H = matrix.dup(); + return; + } + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::Hessenberg constructor: input array must be 2D square matrix !"); + + _H = matrix.dup(); + _Q = matrix.ulike(); + + evalData(); +} + +////////////////////////////////////////////////////////////////////////// +template +void Hessenberg::evalData() { + + const int rows = _H.sizeAt(0); + + NDArray hhCoeffs(_H.ordering(), {rows - 1}, _H.dataType(), _H.getContext()); + + // calculate _H + for(uint i = 0; i < rows - 1; ++i) { + + T coeff, norm; + + NDArray tail1 = _H({i+1,-1, i,i+1}); + NDArray tail2 = _H({i+2,-1, i,i+1}, true); + + Householder::evalHHmatrixDataI(tail1, coeff, norm); + + _H({0,0, i,i+1}). template r(i+1) = norm; + hhCoeffs. template r(i) = coeff; + + NDArray bottomRightCorner = _H({i+1,-1, i+1,-1}, true); + Householder::mulLeft(bottomRightCorner, tail2, coeff); + + NDArray rightCols = _H({0,0, i+1,-1}, true); + Householder::mulRight(rightCols, tail2.transpose(), coeff); + } + + // calculate _Q + HHsequence hhSeq(_H, hhCoeffs, 'u'); + hhSeq._diagSize = rows - 1; + hhSeq._shift = 1; + hhSeq.applyTo_(_Q); + + // fill down with zeros starting at first subdiagonal + _H.fillAsTriangular(0, -1, 0, _H, 'l'); +} + +////////////////////////////////////////////////////////////////////////// +template +Schur::Schur(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::Schur constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::Schur constructor: input array must be 2D square matrix !"); + + evalData(matrix); +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::evalData(const NDArray& matrix) { + + const T scale = matrix.reduceNumber(reduce::AMax).template t(0); + + const T almostZero = DataTypeUtils::min(); + + if(scale < DataTypeUtils::min()) { + + _T = matrix.ulike(); + _U = matrix.ulike(); + + _T.nullify(); + _U.setIdentity(); + + return; + } + + // perform Hessenberg decomposition + Hessenberg hess(matrix / scale); + + _T = std::move(hess._H); + _U = std::move(hess._Q); + + calcFromHessenberg(); + + _T *= scale; +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::splitTwoRows(const int ind, const T shift) { + + const int numCols = _T.sizeAt(1); + + T p = (T)0.5 * (_T.t(ind-1, ind-1) - _T.t(ind, ind)); + + T q = p*p + _T.t(ind, ind-1) * _T.t(ind-1, ind); + + _T.r(ind, ind) += shift; + _T.r(ind-1, ind-1) += shift; + + if (q >= (T)0) { + + T z = math::nd4j_sqrt(math::nd4j_abs(q)); + + NDArray rotation(_T.ordering(), {2, 2}, _T.dataType(), _T.getContext()); + + if (p >= (T)0) + JacobiSVD::createJacobiRotationGivens(p+z, _T.t(ind, ind-1), rotation); + else + JacobiSVD::createJacobiRotationGivens(p-z, _T.t(ind, ind-1), rotation); + + NDArray rightCols = _T({0,0, ind-1,-1}); + JacobiSVD::mulRotationOnLeft(ind-1, ind, rightCols, rotation.transpose()); + + NDArray topRows = _T({0,ind+1, 0,0}); + JacobiSVD::mulRotationOnRight(ind-1, ind, topRows, rotation); + + JacobiSVD::mulRotationOnRight(ind-1, ind, _U, rotation); + + _T.r(ind, ind-1) = (T)0; + } + + if (ind > 1) + _T.r(ind-1, ind-2) = (T)0; +} + + +////////////////////////////////////////////////////////////////////////// +template +void Schur::calcShift(const int ind, const int iter, T& shift, NDArray& shiftVec) { + + // shiftVec has length = 3 + + shiftVec.r(0) = _T.t(ind, ind); + shiftVec.r(1) = _T.t(ind-1, ind-1); + shiftVec.r(2) = _T.t(ind, ind-1) * _T.t(ind-1, ind); + + if (iter == 10) { + shift += shiftVec.t(0); + + for (int i = 0; i <= ind; ++i) + _T.r(i,i) -= shiftVec.t(0); + + T s = math::nd4j_abs(_T.t(ind, ind-1)) + math::nd4j_abs(_T.t(ind-1, ind-2)); + + shiftVec.r(0) = T(0.75) * s; + shiftVec.r(1) = T(0.75) * s; + shiftVec.r(2) = T(-0.4375) * s*s; + } + + if (iter == 30) { + + T s = (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); + s = s*s + shiftVec.t(2); + + if (s > T(0)) { + + s = math::nd4j_sqrt(s); + + if (shiftVec.t(1) < shiftVec.t(0)) + s = -s; + + s = s + (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); + s = shiftVec.t(0) - shiftVec.t(2) / s; + shift += s; + + for (int i = 0; i <= ind; ++i) + _T.r(i,i) -= s; + + shiftVec = T(0.964); + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::initFrancisQR(const int ind1, const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec) { + + // shiftVec has length = 3 + + for (ind3 = ind2-2; ind3 >= ind1; --ind3) { + + const T mm = _T.t(ind3, ind3); + const T r = shiftVec.t(0) - mm; + const T s = shiftVec.t(1) - mm; + + householderVec.r(0) = (r * s - shiftVec.t(2)) / _T.t(ind3+1, ind3) + _T.t(ind3, ind3+1); + householderVec.r(1) = _T.t(ind3+1, ind3+1) - mm - r - s; + householderVec.r(2) = _T.t(ind3+2, ind3+1); + + if (ind3 == ind1) + break; + + const T lhs = _T.t(ind3,ind3-1) * (math::nd4j_abs(householderVec.t(1)) + math::nd4j_abs(householderVec.t(2))); + const T rhs = householderVec.t(0) * (math::nd4j_abs(_T.t(ind3-1, ind3-1)) + math::nd4j_abs(mm) + math::nd4j_abs(_T.t(ind3+1, ind3+1))); + + if(math::nd4j_abs(lhs) < DataTypeUtils::eps() * rhs) + break; + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec) { + + if(!(ind2 >= ind1)) + throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition ind2 >= ind1 must be true !"); + if(!(ind2 <= ind3-2)) + throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition iind2 <= ind3-2 must be true !"); + + const int numCols = _T.sizeAt(1); + + for (int k = ind2; k <= ind3-2; ++k) { + + const bool firstIter = (k == ind2); + + T coeff, normX; + NDArray tail(_T.ordering(), {2, 1}, _T.dataType(), _T.getContext()); + Householder::evalHHmatrixData(firstIter ? householderVec : _T({k,k+3, k-1,k}), tail, coeff, normX); + + if (normX != T(0)) { + + if (firstIter && k > ind1) + _T.r(k, k-1) = -_T.t(k, k-1); + else if (!firstIter) + _T.r(k, k-1) = normX; + + NDArray block1 = _T({k,k+3, k,numCols}, true); + Householder::mulLeft(block1, tail, coeff); + + NDArray block2 = _T({0,math::nd4j_min(ind3,k+3)+1, k,k+3}, true); + Householder::mulRight(block2, tail, coeff); + + NDArray block3 = _U({0,numCols, k,k+3}, true); + Householder::mulRight(block3, tail, coeff); + } + } + + T coeff, normX; + NDArray tail(_T.ordering(), {1, 1}, _T.dataType(), _T.getContext()); + Householder::evalHHmatrixData(_T({ind3-1,ind3+1, ind3-2,ind3-1}), tail, coeff, normX); + + if (normX != T(0)) { + + _T.r(ind3-1, ind3-2) = normX; + + NDArray block1 = _T({ind3-1,ind3+1, ind3-1,numCols}, true); + Householder::mulLeft(block1, tail, coeff); + + NDArray block2 = _T({0,ind3+1, ind3-1,ind3+1}, true); + Householder::mulRight(block2, tail, coeff); + + NDArray block3 = _U({0,numCols, ind3-1,ind3+1}, true); + Householder::mulRight(block3, tail, coeff); + } + + for (int i = ind2+2; i <= ind3; ++i) { + _T.r(i, i-2) = T(0); + if (i > ind2+2) + _T.r(i, i-3) = T(0); + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::calcFromHessenberg() { + + const int maxIters = _maxItersPerRow * _T.sizeAt(0); + + const int numCols = _T.sizeAt(1); + int iu = numCols - 1; + int iter = 0; + int totalIter = 0; + + T shift = T(0); + + T norm = 0; + for (int j = 0; j < numCols; ++j) + norm += _T({0,math::nd4j_min(numCols,j+2), j,j+1}).reduceNumber(reduce::ASum).template t(0); + + if(norm != T(0)) { + + while (iu >= 0) { + + const int il = getSmallSubdiagEntry(iu); + + if (il == iu) { + + _T.r(iu,iu) = _T.t(iu,iu) + shift; + if (iu > 0) + _T.r(iu, iu-1) = T(0); + iu--; + iter = 0; + + } + else if (il == iu-1) { + + splitTwoRows(iu, shift); + iu -= 2; + iter = 0; + } + else { + + NDArray householderVec(_T.ordering(), {3}, _T.dataType(), _T.getContext()); + NDArray shiftVec (_T.ordering(), {3}, _T.dataType(), _T.getContext()); + + calcShift(iu, iter, shift, shiftVec); + + ++iter; + ++totalIter; + + if (totalIter > maxIters) + break; + + int im; + initFrancisQR(il, iu, shiftVec, im, householderVec); + doFrancisQR(il, im, iu, householderVec); + } + } + } +} + +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; + +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/MmulHelper.cpp b/libnd4j/include/helpers/impl/MmulHelper.cpp index 8e37fd530..ba86bb1b5 100644 --- a/libnd4j/include/helpers/impl/MmulHelper.cpp +++ b/libnd4j/include/helpers/impl/MmulHelper.cpp @@ -207,7 +207,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND const bool isBVector = shape::isCommonVector(B->shapeInfo(), lenDim); // dot product of 2 vectors - if(isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) + if(A->lengthOf() == B->lengthOf() && isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) return dot(A, B, C, alpha, beta); // matrix x matrix diff --git a/libnd4j/include/helpers/impl/Sqrtm.cpp b/libnd4j/include/helpers/impl/Sqrtm.cpp new file mode 100644 index 000000000..5fe45656f --- /dev/null +++ b/libnd4j/include/helpers/impl/Sqrtm.cpp @@ -0,0 +1,276 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtmQuasiTrianDiag(const NDArray& matrixT, NDArray& sqrtT ) { + + const int rows = matrixT.sizeAt(0); + + for(int i = 0; i < rows; i++) { + + if (i == rows - 1 || matrixT.t(i+1, i) == (T)0) { + const auto elemT = matrixT.t(i, i); + if(elemT < (T)0) + throw std::runtime_error("ops::helpers::Sqrtm::sqrtmQuasiTrianDiag: can't take sqrt of negative diagonal element of T matrix !"); + sqrtT.r(i,i) = math::nd4j_sqrt(elemT); + } + else { + + EigenValsAndVecs es(matrixT({i,i+2, i,i+2}, true)); // es._Vecs {2,2,2}, es._Vals{2,2} + + const NDArray& vecs = es._Vecs; + const NDArray& vals = es._Vals; + + const T& vecsReal00 = vecs.t(0,0,0); + const T& vecsImag00 = vecs.t(0,0,1); + const T& vecsReal01 = vecs.t(0,1,0); + const T& vecsImag01 = vecs.t(0,1,1); + const T& vecsReal10 = vecs.t(1,0,0); + const T& vecsImag10 = vecs.t(1,0,1); + const T& vecsReal11 = vecs.t(1,1,0); + const T& vecsImag11 = vecs.t(1,1,1); + + // es.eigenvalues().cwiseSqrt().asDiagonal() + T eigenValsSqrt[2][2]; + eigenValsSqrt[0][0] = vals.t(0,0); + eigenValsSqrt[0][1] = vals.t(0,1); + eigenValsSqrt[1][0] = vals.t(1,0); + eigenValsSqrt[1][1] = vals.t(1,1); + EigenValsAndVecs::sqrtComplexNum(eigenValsSqrt[0][0], eigenValsSqrt[0][1]); + EigenValsAndVecs::sqrtComplexNum(eigenValsSqrt[1][0], eigenValsSqrt[1][1]); + + // es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() + T vecsElem[2][2][2]; + EigenValsAndVecs::multiplyComplexNums(vecsReal00,vecsImag00, eigenValsSqrt[0][0],eigenValsSqrt[0][1], vecsElem[0][0][0],vecsElem[0][0][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal01,vecsImag01, eigenValsSqrt[1][0],eigenValsSqrt[1][1], vecsElem[0][1][0],vecsElem[0][1][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal10,vecsImag10, eigenValsSqrt[0][0],eigenValsSqrt[0][1], vecsElem[1][0][0],vecsElem[1][0][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal11,vecsImag11, eigenValsSqrt[1][0],eigenValsSqrt[1][1], vecsElem[1][1][0],vecsElem[1][1][1]); + + // es.eigenvectors().inverse() + T vecsElemInv[2][2][2]; + + T tempReal, tempImag, divisorReal, divisorImag; + EigenValsAndVecs::multiplyComplexNums(vecsReal00,vecsImag00, vecsReal11,vecsImag11, divisorReal,divisorImag); + EigenValsAndVecs::multiplyComplexNums(vecsReal01,vecsImag01, vecsReal10,vecsImag10, tempReal,tempImag); + divisorReal -= tempReal; + divisorImag -= tempImag; + + EigenValsAndVecs::divideComplexNums(vecsReal11,vecsImag11, divisorReal,divisorImag, vecsElemInv[0][0][0],vecsElemInv[0][0][1]); + EigenValsAndVecs::divideComplexNums(-vecsReal01,-vecsImag01, divisorReal,divisorImag, vecsElemInv[0][1][0],vecsElemInv[0][1][1]); + EigenValsAndVecs::divideComplexNums(-vecsReal10,-vecsImag10, divisorReal,divisorImag, vecsElemInv[1][0][0],vecsElemInv[1][0][1]); + EigenValsAndVecs::divideComplexNums(vecsReal00,vecsImag00, divisorReal,divisorImag, vecsElemInv[1][1][0],vecsElemInv[1][1][1]); + + // result + T result[2][2][2]; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1], vecsElemInv[0][0][0],vecsElemInv[0][0][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1], vecsElemInv[1][0][0],vecsElemInv[1][0][1], result[0][0][0],result[0][0][1]); + result[0][0][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1], vecsElemInv[0][1][0],vecsElemInv[0][1][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1], vecsElemInv[1][1][0],vecsElemInv[1][1][1], result[0][1][0],result[0][1][1]); + result[0][1][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1], vecsElemInv[0][0][0],vecsElemInv[0][0][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1], vecsElemInv[1][0][0],vecsElemInv[1][0][1], result[1][0][0],result[1][0][1]); + result[1][0][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1], vecsElemInv[0][1][0],vecsElemInv[0][1][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1], vecsElemInv[1][1][0],vecsElemInv[1][1][1], result[1][1][0],result[1][1][1]); + result[1][1][0] += tempReal; + + sqrtT.r(i,i) = result[0][0][0]; + sqrtT.r(i,i+1) = result[0][1][0]; + sqrtT.r(i+1,i) = result[1][0][0]; + sqrtT.r(i+1,i+1) = result[1][1][0]; + + ++i; + } + } +} + +////////////////////////////////////////////////////////////////////////// +// all matrices are {2,2} here +template +static void sqrtmQuasiTrianAuxEq(const NDArray& A, const NDArray& B, const NDArray& C, NDArray& X) { + + NDArray tempMatrix(A.ordering(), {4,4}, A.dataType(), A.getContext()); + + tempMatrix.r(0,0) = A.t(0,0) + B.t(0,0); + tempMatrix.r(1,1) = A.t(0,0) + B.t(1,1); + tempMatrix.r(2,2) = A.t(1,1) + B.t(0,0); + tempMatrix.r(3,3) = A.t(1,1) + B.t(1,1); + tempMatrix.r(0,1) = B.t(1,0); + tempMatrix.r(0,2) = A.t(0,1); + tempMatrix.r(1,0) = B.t(0,1); + tempMatrix.r(1,3) = A.t(0,1); + tempMatrix.r(2,0) = A.t(1,0); + tempMatrix.r(2,3) = B.t(1,0); + tempMatrix.r(3,1) = A.t(1,0); + tempMatrix.r(3,2) = B.t(0,1); + tempMatrix.r(0,3) = (T)0; + tempMatrix.r(1,2) = (T)0; + tempMatrix.r(2,1) = (T)0; + tempMatrix.r(3,0) = (T)0; + + NDArray result(A.ordering(), {4,1}, A.dataType(), A.getContext()); + result.r(0,0) = C.t(0,0); + result.r(1,0) = C.t(0,1); + result.r(2,0) = C.t(1,0); + result.r(3,0) = C.t(1,1); + + FullPivLU::solve(tempMatrix, result, result); + + X.r(0,0) = result.t(0); + X.r(0,1) = result.t(1); + X.r(1,0) = result.t(2); + X.r(1,1) = result.t(3); +} + + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtmQuasiTrianOffDiag(const NDArray& matrixT, NDArray& sqrtT ) { + + const int rows = matrixT.sizeAt(0); + + for (int j = 1; j < rows; j++) { + + if (matrixT.t(j, j-1) != (T)0) + continue; + + for (int i = j - 1; i >= 0; i--) { + + if (i > 0 && matrixT.t(i, i-1) != (T)0) + continue; + + const bool iBlockIs2x2 = (i < rows - 1) && (matrixT.t(i+1, i) != (T)0); + const bool jBlockIs2x2 = (j < rows - 1) && (matrixT.t(j+1, j) != (T)0); + + if (iBlockIs2x2 && jBlockIs2x2) { + + NDArray A = sqrtT({i,i+2, i,i+2}, true); + NDArray B = sqrtT({j,j+2, j,j+2}, true); + NDArray X = matrixT({i,i+2, j,j+2}, true);//.dup(); + + if (j - i > 2) + X -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+2}, true)); + + sqrtmQuasiTrianAuxEq(A, B, X, X); + + sqrtT.syncToDevice(); + sqrtT({i,i+2, j,j+2}, true).assign(X); + } + else if (iBlockIs2x2 && !jBlockIs2x2) { + + NDArray rhs = matrixT({i,i+2, j,j+1}, true);//.dup(); + + if (j - i > 2) + rhs -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+1}, true)); + + NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext()); + A.r(0,0) = A.r(1,1) = sqrtT.t(j,j); + A.r(0,1) = A.r(1,0) = T(0); + A += sqrtT({i,i+2, i,i+2}, true); + + FullPivLU::solve(A,rhs,rhs); + + // sqrtT.syncToDevice(); + sqrtT({i,i+2, j,j+1}, true).assign(rhs); + } + else if (!iBlockIs2x2 && jBlockIs2x2) { + + NDArray rhs = matrixT({i,i+1, j,j+2}, true);//.dup(); + + if (j - i > 1) + rhs -= mmul(sqrtT({i,i+1, i+1,j}, true), sqrtT({i+1,j, j,j+2}, true)); + + NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext()); + A.r(0,0) = A.r(1,1) = sqrtT.t(i,i); + A.r(0,1) = A.r(1,0) = T(0); + A += sqrtT({j,j+2, j,j+2}, true).transpose(); + + NDArray rhsT = rhs.transpose(); + FullPivLU::solve(A,rhsT,rhsT); + + // sqrtT.syncToDevice(); + sqrtT({i,i+1, j,j+2}, true).assign(rhs); + } + else if (!iBlockIs2x2 && !jBlockIs2x2) { + + T temp = mmul(sqrtT({i,i+1, i+1,j}), sqrtT({i+1,j, j,j+1})).t(0); // dot + sqrtT.r(i,j) = (matrixT.t(i,j) - temp ) / (sqrtT.t(i,i) + sqrtT.t(j,j)); + } + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Sqrtm::calc(const NDArray& in, NDArray& out) { + + if(in.rankOf() != 2 || in.sizeAt(0) != in.sizeAt(1)) + throw std::runtime_error("ops::helpers::Sqrtm::calc: input matrix must have rank 2 and be square !"); + if(!out.isSameShape(in)) + throw std::runtime_error("ops::helpers::Sqrtm::calc: output matrix must have the same shape as input one!"); + + if(in.lengthOf() == 1) { + out.r(0) = math::nd4j_sqrt(in.t(0)); + return; + } + + ops::helpers::Schur schur(in); + + const NDArray& t1 = schur._T; + const NDArray& t2 = schur._U; + + NDArray sqrtT = in.ulike(); + sqrtT.nullify(); + + sqrtmQuasiTrianDiag(schur._T, sqrtT); + sqrtmQuasiTrianOffDiag(schur._T, sqrtT); + + // out = U * sqrtT * U^T; + NDArray temp = mmul(sqrtT, schur._U.transpose()); + MmulHelper::mmul(&schur._U, &temp, &out); +} + +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; + + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/biDiagonalUp.cpp b/libnd4j/include/helpers/impl/biDiagonalUp.cpp new file mode 100644 index 000000000..d5326c21a --- /dev/null +++ b/libnd4j/include/helpers/impl/biDiagonalUp.cpp @@ -0,0 +1,160 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 18.12.2017 +// + + +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(NDArray(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())), + _HHbidiag(NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) { + + // input validation + if(matrix.rankOf() != 2 || matrix.isScalar()) + throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !"); + + _HHmatrix.assign(&matrix); + _HHbidiag.assign(0.); + + evalData(); +} + +template +void BiDiagonalUp::_evalData() { + + const auto rows = _HHmatrix.sizeAt(0); + const auto cols = _HHmatrix.sizeAt(1); + + if(rows < cols) + throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !"); + + T coeff, normX; + + T x, y; + + for(Nd4jLong i = 0; i < cols-1; ++i ) { + + // evaluate Householder matrix nullifying columns + NDArray column1 = _HHmatrix({i,rows, i,i+1}); + + x = _HHmatrix.t(i,i); + y = _HHbidiag.t(i,i); + + Householder::evalHHmatrixDataI(column1, x, y); + + _HHmatrix.r(i, i) = x; + _HHbidiag.r(i, i) = y; + + // multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner + NDArray bottomRightCorner1 = _HHmatrix({i,rows, i+1,cols}, true); // {i, cols} + Householder::mulLeft(bottomRightCorner1, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.t(i,i)); + + if(i == cols-2) + continue; // do not apply right multiplying at last iteration + + // evaluate Householder matrix nullifying rows + NDArray row1 = _HHmatrix({i,i+1, i+1,cols}); + + x = _HHmatrix.t(i,i+1); + y = _HHbidiag.t(i,i+1); + + Householder::evalHHmatrixDataI(row1, x, y); + + _HHmatrix.r(i, i+1) = x; + _HHbidiag.r(i, i+1) = y; + + // multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P + NDArray bottomRightCorner2 = _HHmatrix({i+1,rows, i+1,cols}, true); // {i, rows} + + Householder::mulRight(bottomRightCorner2, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.t(i,i+1)); + } + + NDArray row2 =_HHmatrix({cols-2,cols-1, cols-1,cols}); + + x = _HHmatrix.t(cols-2,cols-1); + y = _HHbidiag.t(cols-2,cols-1); + + Householder::evalHHmatrixDataI(row2, x, y); + + _HHmatrix.r(cols-2,cols-1) = x; + _HHbidiag.r(cols-2,cols-1) = y; + + NDArray column2 = _HHmatrix({cols-1,rows, cols-1,cols}); + + x = _HHmatrix.t(cols-1,cols-1); + y = _HHbidiag.t(cols-1,cols-1); + + Householder::evalHHmatrixDataI(column2, x, y); + + _HHmatrix.r(cols-1, cols-1) = x; + _HHbidiag.r(cols-1, cols-1) = y; +} + +////////////////////////////////////////////////////////////////////////// +void BiDiagonalUp::evalData() { + auto xType = _HHmatrix.dataType(); + BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES); +} + +////////////////////////////////////////////////////////////////////////// +template +HHsequence BiDiagonalUp::makeHHsequence_(const char type) { + + const int diagSize = type == 'u' ? _HHbidiag.sizeAt(0) : _HHbidiag.sizeAt(0) - 1; + + _hhCoeffs = NDArray(_HHmatrix.ordering(), {diagSize}, _HHmatrix.dataType(), _HHmatrix.getContext()); + + if(type == 'u') + for(int i = 0; i < diagSize; ++i) + _hhCoeffs.r(i) = _HHmatrix.t(i,i); + else + for(int i = 0; i < diagSize; ++i) + _hhCoeffs.r(i) = _HHmatrix.t(i,i+1); + + HHsequence result(_HHmatrix, _hhCoeffs, type); + + if(type != 'u') { + result._diagSize = diagSize; + result._shift = 1; + } + + return result; +} + +////////////////////////////////////////////////////////////////////////// +HHsequence BiDiagonalUp::makeHHsequence(const char type) { + auto xType = _HHmatrix.dataType(); + BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES); +} + + + +BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type), FLOAT_TYPES); + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/hhColPivQR.cpp b/libnd4j/include/helpers/impl/hhColPivQR.cpp new file mode 100644 index 000000000..6f4bbebc9 --- /dev/null +++ b/libnd4j/include/helpers/impl/hhColPivQR.cpp @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 11.01.2018 +// + +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +HHcolPivQR::HHcolPivQR(const NDArray& matrix) { + + _qr = matrix.dup(); + _diagSize = math::nd4j_min(matrix.sizeAt(0), matrix.sizeAt(1)); + _coeffs = NDArray(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext()); + + _permut = NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext()); + + evalData(); +} + + void HHcolPivQR::evalData() { + BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES); + } + +////////////////////////////////////////////////////////////////////////// +template +void HHcolPivQR::_evalData() { + + const int rows = _qr.sizeAt(0); + const int cols = _qr.sizeAt(1); + + NDArray transp(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + NDArray normsUpd(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + NDArray normsDir(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + + int transpNum = 0; + + for (int k = 0; k < cols; ++k) + normsDir.r(k) = normsUpd.r(k) = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).t(0); + + T normScaled = (normsUpd.reduceNumber(reduce::Max)).t(0) * DataTypeUtils::eps(); + T threshold1 = normScaled * normScaled / (T)rows; + T threshold2 = math::nd4j_sqrt(DataTypeUtils::eps()); + + T nonZeroPivots = _diagSize; + T maxPivot = 0.; + + for(int k = 0; k < _diagSize; ++k) { + + int biggestColIndex = normsUpd({k,-1}).indexReduceNumber(indexreduce::IndexMax).e(0); + T biggestColNorm = normsUpd({k,-1}).reduceNumber(reduce::Max).t(0); + T biggestColSqNorm = biggestColNorm * biggestColNorm; + biggestColIndex += k; + + if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k)) + nonZeroPivots = k; + + transp.r(k) = (T)biggestColIndex; + + if(k != biggestColIndex) { + + NDArray temp1(_qr({0,0, k,k+1})); + NDArray temp2(_qr({0,0, biggestColIndex,biggestColIndex+1})); + temp1.swapUnsafe(temp2); + + math::nd4j_swap(normsUpd.r(k), normsUpd.r(biggestColIndex)); + math::nd4j_swap(normsDir.r(k), normsDir.r(biggestColIndex)); + + ++transpNum; + } + + T normX, c; + NDArray qrBlock = _qr({k,rows, k,k+1}); + Householder::evalHHmatrixDataI(qrBlock, c, normX); + + _coeffs.r(k) = c; + + _qr.r(k,k) = normX; + + T max = math::nd4j_abs(normX); + if(max > maxPivot) + maxPivot = max; + + if(k < rows && (k+1) < cols) { + NDArray qrBlock = _qr({k,rows, k+1,cols}, true); + NDArray tail = _qr({k+1,rows, k, k+1}, true); + Householder::mulLeft(qrBlock, tail, _coeffs.t(k)); + } + + for (int j = k + 1; j < cols; ++j) { + + if (normsUpd.t(j) != (T)0.f) { + + T temp = math::nd4j_abs(_qr.t(k, j)) / normsUpd.t(j); + temp = ((T)1. + temp) * ((T)1. - temp); + temp = temp < (T)0. ? (T)0. : temp; + T temp2 = temp * normsUpd.t(j) * normsUpd.t(j) / (normsDir.t(j)*normsDir.t(j)); + + if (temp2 <= threshold2) { + if(k+1 < rows && j < cols) + normsDir.r(j) = _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).t(0); + + normsUpd.r(j) = normsDir.t(j); + } + else + normsUpd.r(j) = normsUpd.t(j) * math::nd4j_sqrt(temp); + } + } + } + + _permut.setIdentity(); + + for(int k = 0; k < _diagSize; ++k) { + + int idx = transp.e(k); + NDArray temp1 = _permut({0,0, k, k+1}); + NDArray temp2 = _permut({0,0, idx,idx+1}); + temp1.swapUnsafe(temp2); + } +} + +BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES); + +} +} +} + diff --git a/libnd4j/include/helpers/cpu/hhSequence.cpp b/libnd4j/include/helpers/impl/hhSequence.cpp similarity index 59% rename from libnd4j/include/helpers/cpu/hhSequence.cpp rename to libnd4j/include/helpers/impl/hhSequence.cpp index 8a2a35329..dc038dfc8 100644 --- a/libnd4j/include/helpers/cpu/hhSequence.cpp +++ b/libnd4j/include/helpers/impl/hhSequence.cpp @@ -20,7 +20,6 @@ #include #include -#include namespace sd { namespace ops { @@ -29,40 +28,32 @@ namespace helpers { ////////////////////////////////////////////////////////////////////////// HHsequence::HHsequence(const NDArray& vectors, const NDArray& coeffs, const char type): _vectors(vectors), _coeffs(coeffs) { - + _diagSize = sd::math::nd4j_min(_vectors.sizeAt(0), _vectors.sizeAt(1)); - _shift = 0; + _shift = 0; _type = type; } ////////////////////////////////////////////////////////////////////////// template -void HHsequence::_mulLeft(NDArray& matrix) { +void HHsequence::mulLeft_(NDArray& matrix) { const int rows = _vectors.sizeAt(0); const int cols = _vectors.sizeAt(1); - const int inRows = matrix.sizeAt(0); + const int inRows = matrix.sizeAt(0); - NDArray* block(nullptr); + for(int i = _diagSize - 1; i >= 0; --i) { - for(int i = _diagSize - 1; i >= 0; --i) { - if(_type == 'u') { - - block = new NDArray(matrix({inRows-rows+_shift+ i,inRows, 0,0}, true)); - T _x = _coeffs.e(i); - Householder::mulLeft(*block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _x); - _coeffs.p(i, _x); + + NDArray block = matrix({inRows-rows+_shift+ i,inRows, 0,0}, true); + Householder::mulLeft(block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _coeffs.t(i)); } else { - block = new NDArray(matrix({inRows-cols+_shift+i,inRows, 0,0}, true)); - T _x = _coeffs.e(i); - Householder::mulLeft(*block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _x); - _coeffs.p(i, _x); + NDArray block = matrix({inRows-cols+_shift+i,inRows, 0,0}, true); + Householder::mulLeft(block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _coeffs.t(i)); } - - delete block; } } @@ -70,55 +61,51 @@ void HHsequence::_mulLeft(NDArray& matrix) { ////////////////////////////////////////////////////////////////////////// NDArray HHsequence::getTail(const int idx) const { - + int first = idx + 1 + _shift; - + if(_type == 'u') return _vectors({first, -1, idx, idx+1}, true); else - return _vectors({idx, idx+1, first, -1}, true); + return _vectors({idx, idx+1, first, -1}, true); } - ////////////////////////////////////////////////////////////////////////// template -void HHsequence::_applyTo(NDArray& dest) { - +void HHsequence::applyTo_(NDArray& dest) { + int size = _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); if(dest.rankOf() != 2 || (dest.sizeAt(0) != size && dest.sizeAt(1) != size)) - dest = NDArrayFactory::create(dest.ordering(), {size, size}, dest.dataType(), dest.getContext()); + dest = NDArray(dest.ordering(), {size, size}, dest.dataType(), dest.getContext()); dest.setIdentity(); - + for(int k = _diagSize - 1; k >= 0; --k) { - + int curNum = size - k - _shift; if(curNum < 1 || (k + 1 + _shift) >= size ) continue; auto block = dest({dest.sizeAt(0)-curNum,dest.sizeAt(0), dest.sizeAt(1)-curNum,dest.sizeAt(1)}, true); - T _x = _coeffs.e(k); - Householder::mulLeft(block, getTail(k), _x); - - _coeffs.p(k, _x); - } -} - - - void HHsequence::applyTo(NDArray& dest) { - auto xType = _coeffs.dataType(); - - BUILD_SINGLE_SELECTOR(xType, _applyTo, (dest), FLOAT_TYPES); + Householder::mulLeft(block, getTail(k), _coeffs.t(k)); } +} - void HHsequence::mulLeft(NDArray& matrix) { - auto xType = _coeffs.dataType(); +////////////////////////////////////////////////////////////////////////// +void HHsequence::applyTo(NDArray& dest) { + auto xType = _coeffs.dataType(); + BUILD_SINGLE_SELECTOR(xType, applyTo_, (dest), FLOAT_TYPES); +} - BUILD_SINGLE_SELECTOR(xType, _mulLeft, (matrix), FLOAT_TYPES); - } +////////////////////////////////////////////////////////////////////////// +void HHsequence::mulLeft(NDArray& matrix) { + auto xType = _coeffs.dataType(); + BUILD_SINGLE_SELECTOR(xType, mulLeft_, (matrix), FLOAT_TYPES); +} + +BUILD_SINGLE_TEMPLATE(template void HHsequence::applyTo_, (sd::NDArray &dest), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void HHsequence::mulLeft_, (NDArray& matrix), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void HHsequence::_applyTo, (sd::NDArray &dest), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void HHsequence::_mulLeft, (NDArray& matrix), FLOAT_TYPES); } } } diff --git a/libnd4j/include/helpers/impl/householder.cpp b/libnd4j/include/helpers/impl/householder.cpp new file mode 100644 index 000000000..e9572f9f6 --- /dev/null +++ b/libnd4j/include/helpers/impl/householder.cpp @@ -0,0 +1,218 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 18.12.2017 +// + +#include + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +// template +// NDArray Householder::evalHHmatrix(const NDArray& x) { + +// // input validation +// if(x.rankOf() != 1 && !x.isScalar()) +// throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: iinput array must have rank = 1 or to be scalar!"); + +// const auto xLen = x.lengthOf(); + +// NDArray w(x.ordering(), {xLen, 1}, x.dataType(), x.getContext()); // column-vector + +// NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray(); +// T tailXnorm = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t(0) : (T)0; + +// const auto xFirstElem = x.t(0); + +// T coeff, normX; + +// if(tailXnorm <= DataTypeUtils::min()) { + +// normX = xFirstElem; +// coeff = 0.f; +// if(xLen > 1) +// w({1,-1, 0,0}) = 0.f; +// } +// else { + +// normX = math::nd4j_sqrt(xFirstElem*xFirstElem + tailXnorm); + +// if(xFirstElem >= (T)0.f) +// normX = -normX; // choose opposite sign to lessen roundoff error + +// coeff = (normX - xFirstElem) / normX; + +// if(xLen > 1) +// w({1,-1, 0,0}).assign(xTail / (xFirstElem - normX)); +// } + +// w.t(0) = (T)1; + +// NDArray identity(x.ordering(), {xLen, xLen}, x.dataType(), x.getContext()); +// identity.setIdentity(); // identity matrix + +// return identity - mmul(w, w.transpose()) * coeff; +// } + +////////////////////////////////////////////////////////////////////////// +template +void Householder::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) { + + // input validation + if(x.rankOf() != 1 && !x.isScalar()) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must have rank = 1 or to be scalar!"); + + if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!"); + + const auto xLen = x.lengthOf(); + + const NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray(); + + T tailXnorm = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t(0) : (T)0; + + const auto xFirstElem = x.t(0); + + if(tailXnorm <= DataTypeUtils::min()) { + + normX = xFirstElem; + coeff = (T)0.f; + tail = (T)0.f; + } + else { + + normX = math::nd4j_sqrt(xFirstElem*xFirstElem + tailXnorm); + + if(xFirstElem >= (T)0.f) + normX = -normX; // choose opposite sign to lessen roundoff error + + coeff = (normX - xFirstElem) / normX; + + tail.assign(xTail / (xFirstElem - normX)); + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Householder::evalHHmatrixDataI(NDArray& x, T& coeff, T& normX) { + + // input validation + if(x.rankOf() != 1 && !x.isScalar()) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixDataI method: input array must have rank = 1 or to be scalar!"); + + int rows = (int)x.lengthOf()-1; + int num = 1; + + if(rows == 0) { + rows = 1; + num = 0; + } + + NDArray tail = x({num, -1}); + + evalHHmatrixData(x, tail, coeff, normX); +} + +////////////////////////////////////////////////////////////////////////// +template +void Householder::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) { + + // if(matrix.rankOf() != 2) + // throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !"; + + if(matrix.sizeAt(0) == 1 && coeff != (T)0) { + + matrix *= (T) 1.f - coeff; + } + else if(coeff != (T)0.f) { + + NDArray bottomPart = matrix({1,matrix.sizeAt(0), 0,0}, true); + NDArray fistRow = matrix({0,1, 0,0}, true); + + if(tail.isColumnVector()) { + + auto resultingRow = mmul(tail.transpose(), bottomPart); + resultingRow += fistRow; + resultingRow *= coeff; + fistRow -= resultingRow; + bottomPart -= mmul(tail, resultingRow); + } + else { + + auto resultingRow = mmul(tail, bottomPart); + resultingRow += fistRow; + resultingRow *= coeff; + fistRow -= resultingRow; + bottomPart -= mmul(tail.transpose(), resultingRow); + } + } +} + + +////////////////////////////////////////////////////////////////////////// +template +void Householder::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) { + + // if(matrix.rankOf() != 2) + // throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !"; + + if(matrix.sizeAt(1) == 1 && coeff != (T)0) { + matrix *= (T)1.f - coeff; + } + else if(coeff != (T)0.f) { + + NDArray rightPart = matrix({0,0, 1,matrix.sizeAt(1)}, true); + NDArray fistCol = matrix({0,0, 0,1}, true); + + if(tail.isColumnVector()) { + + auto resultingCol = mmul(rightPart, tail); + resultingCol += fistCol; + resultingCol *= coeff; + fistCol -= resultingCol; + rightPart -= mmul(resultingCol, tail.transpose()); + } + else { + + auto resultingCol = mmul(rightPart, tail.transpose()); + resultingCol += fistCol; + resultingCol *= coeff; + fistCol -= resultingCol; + rightPart -= mmul(resultingCol, tail); + } + } +} + + +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; + + + + + + + +} +} +} diff --git a/libnd4j/include/helpers/cpu/jacobiSVD.cpp b/libnd4j/include/helpers/impl/jacobiSVD.cpp similarity index 58% rename from libnd4j/include/helpers/cpu/jacobiSVD.cpp rename to libnd4j/include/helpers/impl/jacobiSVD.cpp index 372a2a409..7fbf183b2 100644 --- a/libnd4j/include/helpers/cpu/jacobiSVD.cpp +++ b/libnd4j/include/helpers/impl/jacobiSVD.cpp @@ -20,8 +20,7 @@ #include #include -#include - +#include namespace sd { namespace ops { @@ -43,27 +42,27 @@ JacobiSVD::JacobiSVD(const NDArray& matrix, const bool calcU, const bool calc _calcV = calcV; _fullUV = fullUV; - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); if(_calcU) { if(_fullUV) - _u = NDArrayFactory::create(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext()); } else - _u = NDArrayFactory::create(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext()); if(_calcV) { if(_fullUV) - _v = NDArrayFactory::create(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext()); else - _v = NDArrayFactory::create(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext()); } else - _v = NDArrayFactory::create(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); evalData(matrix); } @@ -77,16 +76,19 @@ void JacobiSVD::mulRotationOnLeft(const int i, const int j, NDArray& block, c if(j+1 > block.sizeAt(0)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: second arguments is out of array row range !"); - auto pTemp = block({i,j+1,j-i, 0,0,0}, true, true); - auto temp = pTemp; - pTemp.assign(mmul(rotation, temp)); + auto temp = block({i,j+1,j-i, 0,0,0}, true, true); + temp.assign(mmul(rotation, temp)); + + // auto pTemp = block({i,j+1,j-i, 0,0,0}, true, true); + // auto temp = pTemp.dup(); + // pTemp.assign(mmul(rotation, temp)); } else { if(j+1 > block.sizeAt(0) || i+1 > block.sizeAt(0)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: some or both integer arguments are out of array row range !"); - auto temp = NDArrayFactory::create(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext()); + NDArray temp(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext()); auto row1 = block({i,i+1, 0,0}, true); auto row2 = block({j,j+1, 0,0}, true); auto rowTemp1 = temp({0,1, 0,0}, true); @@ -108,16 +110,19 @@ void JacobiSVD::mulRotationOnRight(const int i, const int j, NDArray& block, if(j+1 > block.sizeAt(1)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: second argument is out of array column range !"); - auto pTemp = block({0,0,0, i,j+1,j-i}, true, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, rotation)); + auto temp = block({0,0,0, i,j+1,j-i}, true, true); + temp.assign(mmul(temp, rotation)); + + // auto pTemp = block({0,0,0, i,j+1,j-i}, true, true); + // auto temp = pTemp.dup(); + // pTemp.assign(mmul(temp, rotation)); } else { if(j+1 > block.sizeAt(1) || i+1 > block.sizeAt(1)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: some or both integer arguments are out of array column range !"); - auto temp = NDArrayFactory::create(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext()); + NDArray temp(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext()); auto col1 = block({0,0, i,i+1}, true); auto col2 = block({0,0, j,j+1}, true); auto colTemp1 = temp({0,0, 0,1}, true); @@ -134,123 +139,148 @@ void JacobiSVD::mulRotationOnRight(const int i, const int j, NDArray& block, template bool JacobiSVD::isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem) { - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - T n = math::nd4j_sqrt(block.e(p,p) * block.e(p,p) + block.e(q,p) * block.e(q,p)); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + + T n = math::nd4j_sqrt(block.t(p, p) * block.t(p, p) + block.t(q, p)*block.t(q, p)); const T almostZero = DataTypeUtils::min(); const T precision = DataTypeUtils::eps(); if(n == (T)0.f) { - block.p(p, p, 0.f); - block.p(q, p, 0.f); + block.r(p, p) = (T)0; + block.r(q, p) = (T)0; } else { - T v = block.e(p, p) / n; + T v = block.t(p, p) / n; - rotation.p(0, 0, v); - rotation.p(1,1, v); + rotation.r(0,0) = rotation.r(1,1) = v; - v = block.e(q,p) / n; - rotation.p(0, 1, v); + v = block.t(q, p) / n; + rotation.r(0,1) = v; - rotation.p(1,0, -rotation.template e(0, 1)); + rotation.r(1,0) = -rotation.template t(0,1); mulRotationOnLeft(p, q, block, rotation); - if(_calcU) { - auto temp2 = rotation.transpose(); - mulRotationOnRight(p, q, _u, temp2); - } + if(_calcU) + mulRotationOnRight(p, q, _u, rotation.transpose()); } - maxElem = math::nd4j_max(maxElem, math::nd4j_max(math::nd4j_abs(block.e(p,p)), math::nd4j_abs(block.e(q,q)))); + maxElem = math::nd4j_max(maxElem, math::nd4j_max(math::nd4j_abs(block.t(p, p)), math::nd4j_abs(block.t(q, q)))); T threshold = math::nd4j_max(almostZero, precision * maxElem); - const bool condition1 = math::nd4j_abs(block.e(p,q)) > threshold; - const bool condition2 = math::nd4j_abs(block.e(q,p)) > threshold; - return condition1 || condition2; + return math::nd4j_abs(block.t(p, q)) > threshold || math::nd4j_abs(block.t(q, p)) > threshold; } ////////////////////////////////////////////////////////////////////////// template bool JacobiSVD::createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation) { - T denom = 2.* math::nd4j_abs(y); + T denom = (T)(2.f)* math::nd4j_abs(y); if(denom < DataTypeUtils::min()) { - rotation.p(0,0, 1.f); - rotation.p(1,1, 1.f); - rotation.p(0,1, 0.f); - rotation.p(1,0, 0.f); + rotation.r(0,0) = rotation.r(1,1) = (T)1.f; + rotation.r(0,1) = rotation.r(1,0) = (T)0.f; + return false; } else { T tau = (x-z)/denom; - T w = math::nd4j_sqrt(tau*tau + 1.); + T w = math::nd4j_sqrt(tau*tau + (T)1.f); T t; if(tau > (T)0.) - t = 1. / (tau + w); + t = (T)1.f / (tau + w); else - t = 1. / (tau - w); + t = (T)1.f / (tau - w); - T sign = t > (T)0. ? 1. : -1.; - T n = 1. / math::nd4j_sqrt(t*t + 1.f); - rotation.p(0,0, n); - rotation.p(1,1, n); + T sign = t > (T)0. ? (T)1.f : (T)-1.f; - rotation.p(0,1, -sign * (y / math::nd4j_abs(y)) * math::nd4j_abs(t) * n); - rotation.p(1,0, -rotation.e(0,1)); + T cos = (T)1.f / math::nd4j_sqrt(t*t + (T)1.f); + T sin = -sign * (y / math::nd4j_abs(y)) * math::nd4j_abs(t) * cos; + + rotation.r(0,1) = sin; + rotation.r(1,0) = -sin; + rotation.r(0,0) = rotation.r(1,1) = cos; return true; } } + +////////////////////////////////////////////////////////////////////////// +template +void JacobiSVD::createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation) { + + T cos, sin; + + if(q == (T)0) { + + cos = p < (T)0 ? (T)-1 : (T)1; + sin = (T)0; + } + else if(p == (T)0) { + + cos = (T)0; + sin = q < (T)0 ? (T)1 : (T)-1; + } + else if(math::nd4j_abs(p) > math::nd4j_abs(q)) { + + T t = q / p; + T u = math::nd4j_sqrt((T)1 + t*t); + if(p < (T)0) + u = -u; + cos = (T)1 / u; + sin = -t * cos; + } + else { + T t = p / q; + T u = math::nd4j_sqrt((T)1 + t*t); + if(q < (T)0) + u = -u; + sin = -(T)1 / u; + cos = -t * sin; + } + + rotation.r(0,1) = sin; + rotation.r(1,0) = -sin; + rotation.r(0,0) = rotation.r(1,1) = cos; +} + + ////////////////////////////////////////////////////////////////////////// template void JacobiSVD::svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right) { - auto m = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext()); - m.p(0,0, block.e(p,p)); - m.p(0,1, block.e(p,q)); - m.p(1,0, block.e(q,p)); - m.p(1,1, block.e(q,q)); + NDArray m(block.ordering(), {2, 2}, block.dataType(), block.getContext()); + m.r(0,0) = block.t(p,p); + m.r(0,1) = block.t(p,q); + m.r(1,0) = block.t(q,p); + m.r(1,1) = block.t(q,q); - auto rotation = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext()); - T t = m.e(0,0) + m.e(1,1); - T d = m.e(1,0) - m.e(0,1); + NDArray rotation(block.ordering(), {2, 2}, block.dataType(), block.getContext()); + T t = m.t(0,0) + m.t(1,1); + T d = m.t(1,0) - m.t(0,1); if(math::nd4j_abs(d) < DataTypeUtils::min()) { - rotation.p(0,0, 1.f); - rotation.p(1,1, 1.f); - rotation.p(0,1, 0.f); - rotation.p(1,0, 0.f); + rotation.r(0,0) = rotation.r(1,1) = (T)1; + rotation.r(0,1) = rotation.r(1,0) = (T)0; } else { T u = t / d; - T tmp = math::nd4j_sqrt(1. + u*u); - rotation.p(0,0, u / tmp); - rotation.p(1,1, u / tmp); - rotation.p(0,1, 1.f / tmp); - rotation.p(1,0, -rotation.e(0,1)); + T tmp = math::nd4j_sqrt((T)1.f + u*u); + rotation.r(0,0) = rotation.r(1,1) = u / tmp; + rotation.r(0,1) = (T)1.f / tmp; + rotation.r(1,0) = -rotation.t(0,1); } m.assign(mmul(rotation, m)); - auto _x = m.e(0,0); - auto _y = m.e(0,1); - auto _z = m.e(1,1); + createJacobiRotation(m.t(0,0), m.t(0,1), m.t(1,1), right); - createJacobiRotation(_x, _y, _z, right); - - m.p(0, 0, _x); - m.p(0, 1, _y); - m.p(1, 1, _z); - - auto temp = right.transpose(); - left.assign(mmul(rotation, temp)); + left.assign(mmul(rotation, right.transpose())); } @@ -261,7 +291,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { const T precision = (T)2.f * DataTypeUtils::eps(); const T almostZero = DataTypeUtils::min(); - T scale = matrix.reduceNumber(reduce::AMax).e(0); + T scale = matrix.reduceNumber(reduce::AMax).template t(0); if(scale== (T)0.f) scale = (T)1.f; @@ -285,13 +315,12 @@ void JacobiSVD::evalData(const NDArray& matrix) { } else if(_rows < _cols) { - auto matrixT = matrix.transpose(); - HHcolPivQR qr(matrixT / scale); + HHcolPivQR qr(matrix.transpose() / scale); _m.assign(qr._qr({0,_rows, 0,_rows})); _m.fillAsTriangular(0., 0, 0, _m, 'l'); _m.transposei(); - HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here ! + HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here ! if(_fullUV) hhSeg.applyTo(_v); @@ -305,7 +334,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { } else { - _m.assign(static_cast(matrix({0,_diagSize, 0,_diagSize})) / scale); + _m.assign(matrix({0,_diagSize, 0,_diagSize}) / scale); if(_calcU) _u.setIdentity(); @@ -316,7 +345,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { T maxDiagElem = 0.; for(int i = 0; i < _diagSize; ++i) { - T current = math::nd4j_abs(_m.e(i,i)); + T current = math::nd4j_abs(_m.t(i,i)); if(maxDiagElem < current ) maxDiagElem = current; } @@ -333,29 +362,27 @@ void JacobiSVD::evalData(const NDArray& matrix) { T threshold = math::nd4j_max(almostZero, precision * maxDiagElem); - if(math::nd4j_abs(_m.e(p,q)) > threshold || math::nd4j_abs(_m.e(q,p)) > threshold){ + if(math::nd4j_abs(_m.t(p,q)) > threshold || math::nd4j_abs(_m.t(q,p)) > threshold){ stop = false; // if(isBlock2x2NotDiag(_m, p, q, maxDiagElem)) { - auto rotLeft = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - auto rotRight = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + NDArray rotLeft(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + NDArray rotRight(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); svd2x2(_m, p, q, rotLeft, rotRight); mulRotationOnLeft(p, q, _m, rotLeft); - if(_calcU) { - auto temp = rotLeft.transpose(); - mulRotationOnRight(p, q, _u, temp); - } + if(_calcU) + mulRotationOnRight(p, q, _u, rotLeft.transpose()); mulRotationOnRight(p, q, _m, rotRight); if(_calcV) mulRotationOnRight(p, q, _v, rotRight); - maxDiagElem = math::nd4j_max(maxDiagElem, math::nd4j_max(math::nd4j_abs(_m.e(p,p)), math::nd4j_abs(_m.e(q,q)))); + maxDiagElem = math::nd4j_max(maxDiagElem, math::nd4j_max(math::nd4j_abs(_m.t(p,p)), math::nd4j_abs(_m.t(q,q)))); } } } @@ -363,8 +390,10 @@ void JacobiSVD::evalData(const NDArray& matrix) { } for(int i = 0; i < _diagSize; ++i) { - _s.p(i, math::nd4j_abs(_m.e(i,i))); - if(_calcU && _m.e(i,i) < (T)0.) { + + _s.r(i) = math::nd4j_abs(_m.t(i,i)); + + if(_calcU && _m.t(i,i) < (T)0.) { auto temp = _u({0,0, i,i+1}, true); temp.applyTransform(transform::Neg, temp, nullptr); } @@ -375,7 +404,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { for(int i = 0; i < _diagSize; i++) { int pos = (_s({i,-1, 0,0}).indexReduceNumber(indexreduce::IndexMax, nullptr)).template e(0); - T maxSingVal = _s({i,-1, 0,0}).reduceNumber(reduce::Max).template e(0); + T maxSingVal = _s({i,-1, 0,0}).reduceNumber(reduce::Max).template t(0); if(maxSingVal == (T)0.) break; @@ -384,34 +413,24 @@ void JacobiSVD::evalData(const NDArray& matrix) { pos += i; - T _e0 = _s.e(i); - T _e1 = _s.e(pos); - _s.p(pos, _e0); - _s.p(i, _e1); - //math::nd4j_swap(_s(i), _s(pos)); + math::nd4j_swap(_s.r(i), _s.r(pos)); if(_calcU) { auto temp1 = _u({0,0, pos,pos+1}, true); auto temp2 = _u({0,0, i,i+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + temp1.swapUnsafe(temp2); } if(_calcV) { auto temp1 = _v({0,0, pos, pos+1}, true); auto temp2 = _v({0,0, i, i+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + temp1.swapUnsafe(temp2); } } } } - - template class ND4J_EXPORT JacobiSVD; template class ND4J_EXPORT JacobiSVD; template class ND4J_EXPORT JacobiSVD; diff --git a/libnd4j/include/helpers/jacobiSVD.h b/libnd4j/include/helpers/jacobiSVD.h index f6f161bbb..615811e9a 100644 --- a/libnd4j/include/helpers/jacobiSVD.h +++ b/libnd4j/include/helpers/jacobiSVD.h @@ -31,13 +31,13 @@ namespace helpers { template class JacobiSVD { - public: + public: NDArray _m; NDArray _s; // vector with singular values NDArray _u; NDArray _v; - + int _diagSize; int _rows; int _cols; @@ -52,7 +52,8 @@ class JacobiSVD { bool isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem); static bool createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation); - + static void createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation); + static void svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right); static void mulRotationOnLeft(const int i, const int j, NDArray& block, const NDArray& rotation); diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h index 8cde62ea1..65cf29b66 100644 --- a/libnd4j/include/helpers/shape.h +++ b/libnd4j/include/helpers/shape.h @@ -528,7 +528,7 @@ namespace shape { * Returns the element wise stride for this information * buffer */ - ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *shapeInfo); /** diff --git a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu index 334584fab..6d2bcadf5 100644 --- a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu +++ b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu @@ -31,23 +31,37 @@ namespace sd { auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; - __shared__ Nd4jLong resultLength; + __shared__ Nd4jLong resultLength, xEws, yEws; + __shared__ bool sameOffsets, sameOrders; __shared__ T* input; __shared__ T* output; + if (0 == threadIdx.x) { resultLength = shape::length(theFirstShape); input = reinterpret_cast(theSecondBuffer); output = reinterpret_cast(theFirstBuffer); + + sameOffsets = shape::haveSameShapeAndStrides(theFirstShape, theSecondShape); + sameOrders = shape::order(theFirstShape) == shape::order(theSecondShape); + + xEws = shape::elementWiseStride(theFirstShape); + yEws = shape::elementWiseStride(theSecondShape); } __syncthreads(); for (int i = tid; i < resultLength; i += totalThreads) { - auto xEws = shape::order(theFirstShape) == 'c'? shape::elementWiseStride(theFirstShape) :1; - auto yEws = shape::order(theSecondShape) == 'c'? shape::elementWiseStride(theSecondShape):1; - - auto xOffset = shape::getIndexOffset(i * xEws, theFirstShape); - auto yOffset = shape::getIndexOffset(i * yEws, theSecondShape); - sd::math::nd4j_swap(output[xOffset], input[yOffset]); + if(sameOrders && xEws > 0 && yEws > 0) { + sd::math::nd4j_swap(output[i*xEws], input[i*yEws]); + } + else if(sameOffsets) { + const auto offset = shape::getIndexOffset(i, theFirstShape); + sd::math::nd4j_swap(output[offset], input[offset]); + } + else{ + const auto xOffset = shape::getIndexOffset(i, theFirstShape); + const auto yOffset = shape::getIndexOffset(i, theSecondShape); + sd::math::nd4j_swap(output[xOffset], input[yOffset]); + } } } diff --git a/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp new file mode 100644 index 000000000..37472008d --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#if NOT_EXCLUDED(OP_sqrtm) +#include +#include + + +namespace sd { +namespace ops { + +CONFIGURABLE_OP_IMPL(sqrtm, 1, 1, false, 0, 0) { + + auto input = INPUT_VARIABLE(0); + auto output = OUTPUT_VARIABLE(0); + + REQUIRE_TRUE(input->rankOf() > 1, 0, "CONFIGURABLE_OP sqrtm: input array rank is required to be > 1, but got %i instead !", input->rankOf()); + REQUIRE_TRUE(input->sizeAt(-2) == input->sizeAt(-1), 0, "CONFIGURABLE_OP sqrtm: two last dimensions of input array should be square matrices, but got such wrong shape instead: %s!", ShapeUtils::shapeAsString(input).c_str()); + + helpers::sqrtm(block.launchContext(), input, output); + + return Status::OK(); +} + +////////////////////////////////////////////////////////////////////////// +DECLARE_TYPES(sqrtm) { + getOpDescriptor()->setAllowedInputTypes(sd::DataType::ANY)->setAllowedOutputTypes({ALL_FLOATS}); +} + + + +} +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/blas/svd.cpp b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp similarity index 100% rename from libnd4j/include/ops/declarable/generic/blas/svd.cpp rename to libnd4j/include/ops/declarable/generic/linalg/svd.cpp diff --git a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp index c9d23753c..49ec1e135 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp @@ -55,13 +55,13 @@ namespace sd { isLower = !isLower; }; - auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, useAdjoint, z); + auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, false, z); if (input != a) delete input; return Status::OK(); } - + DECLARE_SHAPE_FN(triangular_solve) { auto in0 = inputShape->at(1); auto in1 = inputShape->at(1); diff --git a/libnd4j/include/ops/declarable/headers/blas.h b/libnd4j/include/ops/declarable/headers/blas.h index 09215e113..6fd5a3894 100644 --- a/libnd4j/include/ops/declarable/headers/blas.h +++ b/libnd4j/include/ops/declarable/headers/blas.h @@ -24,7 +24,7 @@ namespace sd { namespace ops { - + /** * This op is general matmum implementation. Depending on inputs dimensionality output result might be different. * matrix x matrix = BLAS gemm @@ -75,11 +75,11 @@ namespace sd { * alpha: vector of T * beta: vector of T * ...: A, B matrices sequentially. i.e: AAAAABBBBB - * + * * Integer arguments: * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments * batchCount - number of operations in this batch - * + * * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch. */ #if NOT_EXCLUDED(OP_batched_gemm) @@ -88,25 +88,39 @@ namespace sd { /** * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array: - * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) + * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) * * Input array: * x[..., Rows, Cols], the necessary condition is: rank of x >= 2 - * + * * Outputs arrays: * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors - * + * * Integer arguments: * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case * IArgs[1] - bool, whether to calculate full-sized u and v * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely: * if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied - * Recommended value is 16. + * Recommended value is 16. */ #if NOT_EXCLUDED(OP_svd) - DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3); + DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3); + #endif + + /** + * calculates square root of matrix such that + * x[..., M, M] = z[..., M, M] x z[..., M, M] + * + * Input array: + * x[..., M, M], the necessary condition is: rank of x >= 2 and equality of last two dimensions + * + * Outputs arrays: + * z - same shape as x + */ + #if NOT_EXCLUDED(OP_sqrtm) + DECLARE_CONFIGURABLE_OP(sqrtm, 1, 1, false, 0, 0); #endif } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp index ec06610b8..0056fec6d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp @@ -118,7 +118,7 @@ static void betaIncForArray(sd::LaunchContext * context, const NDArray& a, const auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) - output.t(i) = betaIncCore(a.t(i), b.t(i), x.t(i)); + output.r(i) = betaIncCore(a.t(i), b.t(i), x.t(i)); }; samediff::Threads::parallel_for(func, 0, xLen); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp index 15ea569e8..ba04fd9aa 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp @@ -73,7 +73,7 @@ namespace helpers { bool setUp = (theSame && row >= 0 && col >= 0 && row < rowDim && col < colDim) || (!theSame); if (setUp) { - outMatrix->t(i, j, pos) = patch->e(row, col, pixel); + outMatrix->r(i, j, pos) = patch->e(row, col, pixel); } pos++; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index d2c918da9..7317f8a73 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -73,7 +73,7 @@ namespace helpers { else if (val >= nudged_max) val = nudged_max; // quantization itself - output->t(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; + output->r(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp index 2f0f00779..68b2130ac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp @@ -318,7 +318,7 @@ namespace helpers { } // copy pixel over all channels for (Nd4jLong e = 0; e < channels; e++) - output->t(b, y, x, e) = images->t(b, inY, inX, e); + output->r(b, y, x, e) = images->t(b, inY, inX, e); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp index 675fb2794..204b05530 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp @@ -40,7 +40,7 @@ namespace helpers { for (auto x = 0; x < lastDims.size(); x++) { for (auto r = 0; r < rows; r++) { - lastDims[x]->t(r,r) = (T)value; + lastDims[x]->r(r,r) = (T)value; } } @@ -71,7 +71,7 @@ namespace helpers { if (err) return err; // alternate moment: inverse lower triangular matrix to solve equation A'x = b' => L^Tx = L^-1 * b' // solve one upper triangular system (to avoid float problems) - + // 5. Solve two triangular systems: auto rightB = rightOutput.ulike(); helpers::triangularSolveFunctor(context, &leftOutput, &rightOutput, true, false, &rightB); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 0f435cfdb..482709455 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -34,7 +34,7 @@ namespace helpers { if (theFirst != theSecond) for (int i = 0; i < matrix->columns(); i++) { - math::nd4j_swap(matrix->t(theFirst, i), matrix->t(theSecond, i)); + math::nd4j_swap(matrix->r(theFirst, i), matrix->r(theSecond, i)); } } BUILD_SINGLE_TEMPLATE(template void swapRows_, (NDArray* matrix, int theFirst, int theSecond), FLOAT_TYPES); @@ -71,12 +71,12 @@ namespace helpers { auto invertDiagonals = PRAGMA_THREADS_FOR { for (int i = start; i < stop; i += increment) - invertedMatrix->t(i, i) /= inputMatrix->t(i, i); + invertedMatrix->r(i, i) /= inputMatrix->t(i, i); }; auto invertSubDiagonals = PRAGMA_THREADS_FOR { for (int i = start; i < stop; i += increment) - invertedMatrix->t(i, i - 1) -= (inputMatrix->t(i, i - 1) * invertedMatrix->t(i - 1, i - 1) / inputMatrix->t(i, i)); + invertedMatrix->r(i, i - 1) -= (inputMatrix->t(i, i - 1) * invertedMatrix->t(i - 1, i - 1) / inputMatrix->t(i, i)); }; samediff::Threads::parallel_for(invertDiagonals, 0, n, 1); @@ -86,7 +86,7 @@ namespace helpers { for (int i = 1; i < n; i++) { for (int j = 0; j < i - 1 ; j++) for (int k = 0; k < i; k++) - invertedMatrix->t(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); + invertedMatrix->r(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); } } @@ -108,13 +108,13 @@ namespace helpers { auto invertDiagonals = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) - invertedMatrix->t(i, i) /= inputMatrix->t(i, i); + invertedMatrix->r(i, i) /= inputMatrix->t(i, i); }; //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance()->elementwiseThreshold()) auto invertUpDiagonals = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) - invertedMatrix->t(i, i + 1) -= (inputMatrix->t(i, i + 1) * invertedMatrix->t(i + 1, i + 1) / + invertedMatrix->r(i, i + 1) -= (inputMatrix->t(i, i + 1) * invertedMatrix->t(i + 1, i + 1) / inputMatrix->t(i, i)); }; @@ -125,7 +125,7 @@ namespace helpers { for (auto i = n - 2; i >= 0; i--) { for (auto j = i + 2; j < n; j++) for (auto k = i; k < n; k++) - invertedMatrix->t(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); + invertedMatrix->r(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); } } @@ -169,10 +169,10 @@ namespace helpers { swapCount++; for( int j = i + 1; j < rowNum; j++ ) { - compoundMatrix.t(j, i) /= compoundMatrix.t(i, i); + compoundMatrix.r(j, i) /= compoundMatrix.t(i, i); //PRAGMA_OMP_PARALLEL_FOR for( int k = i + 1; k < rowNum; k++ ) { - compoundMatrix.t(j, k) -= compoundMatrix.t(j, i) * compoundMatrix.t(i, k); + compoundMatrix.r(j, k) -= compoundMatrix.t(j, i) * compoundMatrix.t(i, k); } } } @@ -190,7 +190,7 @@ namespace helpers { for (auto i = 0; i < rowNum; i++) { for (auto j = 0; j < columnNum; j++) { if (permutationMatrix.t(i, j) != 0) { - permutaionVector.template t(i) = j; + permutaionVector.template r(i) = j; } } } @@ -268,7 +268,7 @@ namespace helpers { sum += compound->t(i,j) * compound->t(j,k); // Evaluating U(i, k) - compound->t(i, k) = input.t(i, k) - sum; + compound->r(i, k) = input.t(i, k) - sum; } // Lower Triangular @@ -279,7 +279,7 @@ namespace helpers { sum += compound->t(k,j) * compound->t(j, i); // Evaluating L(k, i) - compound->t(k, i) = (input.t(k, i) - sum) / compound->t(i,i); + compound->r(k, i) = (input.t(k, i) - sum) / compound->t(i,i); } } } @@ -412,12 +412,12 @@ template lowerMatrix.setIdentity(); // set up U to identity matrix for (int k = 1; k < n; k++) { // and then put all values under main diagonal on to it for (int j = 0; j < k; j++) - lowerMatrix.template t(k, j) = compound.template t(k, j); + lowerMatrix.template r(k, j) = compound.template t(k, j); } upperMatrix.setIdentity(); // set up U to identity matrix for (int k = 0; k < n; k++) { // and then put all values under main diagonal on to it for (int j = k; j < n; j++) - upperMatrix.template t(k, j) = compound.template e(k, j); + upperMatrix.template r(k, j) = compound.template t(k, j); } invertUpperMatrix(&upperMatrix, &matrix); @@ -426,7 +426,7 @@ template sd::MmulHelper::mmul(&matrix, &upperMatrix, &compound, 1.0, 0.0); sd::MmulHelper::mmul(&compound, &permutation, &matrix, 1.0, 0.0); for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) { - output->t(k) = matrix.template t(row++); + output->r(k) = matrix.template t(row++); } } @@ -470,7 +470,7 @@ template invertLowerMatrix(&matrix, &lowerMatrix); for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) { - output->t(k) = lowerMatrix.template t(row++); + output->r(k) = lowerMatrix.template t(row++); } } @@ -597,7 +597,7 @@ template for (Nd4jLong e = 0; e < totalCount; e++) { for (size_t i = 0; i < n; ++i) - output->t(e) += sd::math::nd4j_log(sd::math::nd4j_pow(matricies.at(e)->t(i, i), T(2))); + output->r(e) += sd::math::nd4j_log(sd::math::nd4j_pow(matricies.at(e)->t(i, i), T(2))); } return ND4J_STATUS_OK; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp index d748aa6b0..2a0c5af95 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp @@ -47,8 +47,8 @@ static void mergeMaxIndex_(const std::vector& inArrs, NDArray& o idx = static_cast(i); } } - // FIXME, use .r(e) - output.t(e) = static_cast(idx); + + output.r(e) = static_cast(idx); } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp index 1e96211b3..b0e1553e4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp @@ -68,7 +68,7 @@ namespace helpers { beta != nullptr ? copyBeta->t(e) * u : u); } else { - output->t(pos + e) = math::nd4j_igamma(copyAlpha->t(e), + output->r(pos + e) = math::nd4j_igamma(copyAlpha->t(e), beta != nullptr ? copyBeta->t(e) * u : u); } } @@ -121,7 +121,7 @@ namespace helpers { if (directOut) outputBuf[pos + e] = x; else - output->t(pos + e) = x; + output->r(pos + e) = x; } } } @@ -146,7 +146,7 @@ namespace helpers { else { PRAGMA_OMP_PARALLEL_FOR for (Nd4jLong i = 0; i < output->lengthOf(); i++) { - output->t(i) = rng.relativeT(i, minVal, maxVal); + output->r(i) = rng.relativeT(i, minVal, maxVal); } } } @@ -159,12 +159,12 @@ namespace helpers { // methods: gumbel trick + softmax + argmax template void fillRandomMultiNomial_(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) { - + const Tx* x = input.bufferAsT(); Tz* z = output.bufferAsT(); - + Tx minVal = DataTypeUtils::min(); - Tx maxVal = 1.0; + Tx maxVal = 1.0; auto dimA = (0 == dimC) ? 1 : 0; const Nd4jLong batchValue = output.sizeAt(dimC); @@ -178,7 +178,7 @@ namespace helpers { auto func = PRAGMA_THREADS_FOR_2D{ for (auto nBatchIndex = start_x; nBatchIndex < stop_x; nBatchIndex += inc_x) { for (auto nSampleIndexInBatch = start_y; nSampleIndexInBatch < stop_y; nSampleIndexInBatch += inc_y) { - + const Tx* xTad = x + (nBatchIndex * xDimCstride); Tz* zTad = z + (nBatchIndex * zDimCstride); Tz& arg = zTad[nSampleIndexInBatch * zDimAstride]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp index 2e336da23..a7f40899a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp @@ -54,8 +54,8 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& T t0 = input.t(i); T t1 = input.t(r); //math::nd4j_swap(input(i), input(r)); - input.t(i) = t1; - input.t(r) = t0; + input.r(i) = t1; + input.r(r) = t0; } } else { @@ -66,11 +66,11 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& // FIXME: parallelism!! for(int i = firstDim-1; i > 0; --i) { int r = rng.relativeInt(i) % i; - output.t(i) = input.t(indices[r]); + output.r(i) = input.t(indices[r]); if(i == r) continue; - output.t(r) = input.t(indices[i]); + output.r(r) = input.t(indices[i]); math::nd4j_swap(indices[i], indices[r]); } rng.rewindH(firstDim-1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp index e57264e66..50ff79679 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp @@ -46,7 +46,7 @@ namespace helpers { idx = indices->e(e); val = input->t(e); } - output->t(idx) = val; + output->r(idx) = val; } } else { @@ -65,7 +65,7 @@ namespace helpers { if (indices->e(i) == idx) { for (Nd4jLong e = 0; e < maxT->lengthOf(); e++) { - maxT->t(e) = sd::math::nd4j_max(maxT->t(e), listOfTensors.at(i)->t(e)); + maxT->r(e) = sd::math::nd4j_max(maxT->t(e), listOfTensors.at(i)->t(e)); } } else { @@ -96,7 +96,7 @@ namespace helpers { idx = indices->e(e); val = input->t(e); } - output->t(idx) = val; + output->r(idx) = val; } } else { @@ -417,7 +417,7 @@ namespace helpers { for (size_t idx = 1; idx < fi->second.size(); ++idx) { val = sd::math::nd4j_min(val, input->t(fi->second.at(idx))); } - output->t(fi->first) = val; + output->r(fi->first) = val; } } else { @@ -436,7 +436,7 @@ namespace helpers { auto minT = listOfTensors.at(fi->second.at(idx)); for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) { - outputT->t(e) = sd::math::nd4j_min(minT->t(e), outputT->t(e)); + outputT->r(e) = sd::math::nd4j_min(minT->t(e), outputT->t(e)); } } //outputT->assign(maxT); @@ -890,7 +890,7 @@ namespace helpers { for (auto e = start; e < stop; e++) { auto classNum = indices->e(e); if (sd::math::nd4j_abs(tempRes.t(classNum) - input->t(e)) < 1.e-6) - output->t(e) = gradOut->t(classNum); + output->r(e) = gradOut->t(classNum); } }; @@ -913,7 +913,7 @@ namespace helpers { for (Nd4jLong e = 0; e < current->lengthOf(); e++) { if (sd::math::nd4j_abs(listOfBPTensors.at(classNum)->t(e) - current->t(e)) < 1.e-6) - currentOut->t(e) = currentGradOut->t(e); + currentOut->r(e) = currentGradOut->t(e); } } //}; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp index 8e25c4690..3c8ce573e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp @@ -31,7 +31,7 @@ namespace helpers { for (auto i = start_x; i < stop_x; i += inc_x) for (auto k = start_y; k < stop_y; k += inc_y) if (i < input->t(k)) - output->t(k * maxIndex + i) = B(true); //, T(1.0f)); + output->r(k * maxIndex + i) = B(true); //, T(1.0f)); }; samediff::Threads::parallel_for(func, 0, maxIndex, 1, 0, input->lengthOf(), 1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp index 9a06975aa..a0034bb5d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp @@ -43,7 +43,7 @@ namespace helpers { for (auto batch = start; batch < stop; batch++) { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = 0; c < r; c++) { - math::nd4j_swap(outputPart[batch]->t(r, c) , outputPart[batch]->t(c, r)); + math::nd4j_swap(outputPart[batch]->r(r, c) , outputPart[batch]->r(c, r)); } } } @@ -67,7 +67,7 @@ namespace helpers { for (auto batch = 0; batch < permutationsPart.size(); ++batch) { for (Nd4jLong row = 0; row < PPart[batch]->rows(); ++row) { - PPart[batch]->t(row, permutationsPart[batch]->t(row)) = T(1.f); + PPart[batch]->r(row, permutationsPart[batch]->t(row)) = T(1.f); } } @@ -78,7 +78,7 @@ namespace helpers { ResultSet leftLowerPart = leftLower.allTensorsAlongDimension({-2, -1}); for (auto i = 0; i < leftLowerPart.size(); i++) { for (Nd4jLong r = 0; r < leftLowerPart[i]->rows(); r++) - leftLowerPart[i]->t(r,r) = (T)1.f; + leftLowerPart[i]->r(r,r) = (T)1.f; } // stage 2: triangularSolveFunctor for Lower with given b helpers::triangularSolveFunctor(context, &leftLower, &rightPermuted, true, false, &rightOutput); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp index c4f99af3f..6910960ef 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp @@ -27,911 +27,6 @@ namespace sd { namespace ops { namespace helpers { - -////////////////////////////////////////////////////////////////////////// -template -SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV ) { - - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !"); - - const int rows = matrix.sizeAt(0); - const int cols = matrix.sizeAt(1); - - if(cols > rows) { - - _transp = true; - _diagSize = rows; - } - else { - - _transp = false; - _diagSize = cols; - } - - _switchSize = switchSize; - _calcU = calcU; - _calcV = calcV; - _fullUV = fullUV; - - if (_transp) - math::nd4j_swap(_calcU, _calcV); - - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.); - - if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); - else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); - - if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); - } - - evalData(matrix); -} - -////////////////////////////////////////////////////////////////////////// -template -SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV, const char t) { - - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !"); - - const int rows = matrix.sizeAt(0); - const int cols = matrix.sizeAt(1); - - if(cols > rows) { - - _transp = true; - _diagSize = rows; - } - else { - - _transp = false; - _diagSize = cols; - } - - _switchSize = switchSize; - _calcU = calcU; - _calcV = calcV; - _fullUV = fullUV; - - if (_transp) - math::nd4j_swap(_calcU, _calcV); - - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.f); - - if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); - else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); - - if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::deflation1(int col1, int shift, int ind, int size) { - - if(ind <= 0) - throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !"); - - int first = col1 + shift; - T cos = _m.e(first, first); - T sin = _m.e(first+ind, first); - T denom = math::nd4j_sqrt(cos*cos + sin*sin); - - if (denom == (T)0.) { - - _m.p(first+ind, first+ind, 0.f); - return; - } - - cos /= denom; - sin /= denom; - - _m.p(first,first, denom); - _m.p(first+ind, first, 0.f); - _m.p(first+ind, first+ind, 0.f); - - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0, 0, cos); - rotation.p(0, 1, -sin); - rotation.p(1, 0, sin); - rotation.p(1, 1, cos); - - if (_calcU) { - auto temp = _u({col1,col1+size+1, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1, col1+ind, temp, rotation); - } - else - JacobiSVD::mulRotationOnRight(col1, col1+ind, _u, rotation); -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, int ind2, int size) { - - if(ind1 >= ind2) - throw std::runtime_error("ops::helpers::SVD::deflation2 method: input intes must satisfy condition ind1 < ind2 !"); - - if(size <= 0) - throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !"); - - T cos = _m.e(col1M+ind1, col1M); - T sin = _m.e(col1M+ind2, col1M); - T denom = math::nd4j_sqrt(cos*cos + sin*sin); - - if (denom == (T)0.) { - - _m.p(col1M + ind1, col1M + ind1, _m.e(col1M + ind2, col1M + ind2)); - return; - } - - cos /= denom; - sin /= denom; - _m.p(col1M + ind1, col1M, denom); - _m.p(col1M + ind2, col1M + ind2, _m.e(col1M + ind1, col1M + ind1)); - _m.p(col1M + ind2, col1M, 0.f); - - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0,0, cos); - rotation.p(1,1, cos); - - rotation.p(0,1, -sin); - rotation.p(1,0, sin); - - if (_calcU) { - auto temp = _u({col1U,col1U+size+1, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1U+ind1, col1U+ind2, temp, rotation); - } - else - JacobiSVD::mulRotationOnRight(col1U+ind1, col1U+ind2, _u, rotation); - - if (_calcV) { - auto temp = _v({row1W,row1W+size, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1W+ind1, col1W+ind2, temp, rotation); - } -} - -////////////////////////////////////////////////////////////////////////// -// has effect on block from (col1+shift, col1+shift) to (col2+shift, col2+shift) inclusively -template -void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int shift) -{ - - const int len = col2 + 1 - col1; - - auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true)); - - auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); - - const T almostZero = DataTypeUtils::min(); - T maxElem; - if(len == 1) - maxElem = math::nd4j_abs(diagInterval.template e(0)); - else - maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e(0); - T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e(0); - - T eps = math::nd4j_max(almostZero, DataTypeUtils::eps() * maxElem); - T epsBig = (T)8. * DataTypeUtils::eps() * math::nd4j_max(maxElem0, maxElem); - - if(diagInterval.template e(0) < epsBig) - diagInterval.p(Nd4jLong(0), epsBig); - - for(int i=1; i < len; ++i) - if(math::nd4j_abs(colVec0->template e(i)) < eps) - colVec0->p(i, 0.f); - - for(int i=1; i < len; i++) - if(diagInterval.template e(i) < epsBig) { - deflation1(col1, shift, i, len); - for(int i = 0; i < len; ++i) - diagInterval.p(i, _m.e(col1+shift+i,col1+shift+i)); - } - - { - - bool totDefl = true; - for(int i=1; i < len; i++) - if(colVec0->template e(i) >= almostZero) { - totDefl = false; - break; - } - - int* permut = nullptr; - ALLOCATE(permut, _m.getContext()->getWorkspace(), 3*_diagSize, int); - { - permut[0] = 0; - int p = 1; - - for(int i=1; i(diagInterval.template e(i)) < almostZero) - permut[p++] = i; - - int k = 1, m = ind+1; - - for( ; p < len; ++p) { - if(k > ind) - permut[p] = m++; - else if(m >= len) - permut[p] = k++; - else if(diagInterval.template e(k) < diagInterval.template e(m)) - permut[p] = m++; - else - permut[p] = k++; - } - } - - if(totDefl) { - for(int i=1; i(diagInterval.template e(ki)) < almostZero || diagInterval.template e(0) < diagInterval.template e(ki)) - permut[i-1] = permut[i]; - else { - permut[i-1] = 0; - break; - } - } - } - - int *tInd = permut + len; - int *tCol = permut + 2*len; - - for(int m = 0; m < len; m++) { - tCol[m] = m; - tInd[m] = m; - } - - for(int i = totDefl ? 0 : 1; i < len; i++) { - - const int ki = permut[len - (totDefl ? i+1 : i)]; - const int jac = tCol[ki]; - - T _e0 = diagInterval.template e(jac); - //math::nd4j_swap(diagInterval)(i), (*diagInterval)(jac)); - diagInterval.p(jac, diagInterval.template e(i)); - diagInterval.p(i, _e0); - - if(i!=0 && jac!=0) { - _e0 = colVec0->template e(jac); - //math::nd4j_swap((*colVec0)(i), (*colVec0)(jac)); - colVec0->p(jac, colVec0->template e(i)); - colVec0->p(i, _e0); - } - - if (_calcU) { - auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true); - auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - else { - auto temp1 = _u({0,2, col1+i, col1+i+1}, true); - auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - - if(_calcV) { - auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}, true); - auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - - const int tI = tInd[i]; - tCol[tI] = jac; - tCol[ki] = i; - tInd[jac] = tI; - tInd[i] = ki; - } - - RELEASE(permut, _m.getContext()); - } - - { - int i = len-1; - - while(i > 0 && (math::nd4j_abs(diagInterval.template e(i)) < almostZero || math::nd4j_abs(colVec0->template e(i)) < almostZero)) - --i; - - for(; i > 1; --i) { - if( (diagInterval.template e(i) - diagInterval.template e(i-1)) < DataTypeUtils::eps()*maxElem ) { - if (math::nd4j_abs(diagInterval.template e(i) - diagInterval.template e(i-1)) >= epsBig) - throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !"); - deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len); - } - } - } - - delete colVec0; -} - - -////////////////////////////////////////////////////////////////////////// -template -T SVD::secularEq(const T diff, const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& diagShifted, const T shift) { - - auto len = permut.lengthOf(); - T res = 1.; - T item; - for(int i=0; i(i); - item = col0.e(j) / ((diagShifted.e(j) - diff) * (diag.e(j) + shift + diff)); - res += item * col0.e(j); - } - - return res; -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) { - - auto len = col0.lengthOf(); - auto curLen = len; - - while(curLen > 1 && col0.e(curLen-1) == (T)0.f) - --curLen; - - for (int k = 0; k < len; ++k) { - - if (col0.e(k) == (T)0.f || curLen==1) { - - singVals.p(k, k==0 ? col0.e(0) : diag.e(k)); - mus.p(k, 0.f); - shifts.p(k, k==0 ? col0.e(0) : diag.e(k)); - continue; - } - - T left = diag.e(k); - T right; - - if(k==curLen-1) - right = diag.e(curLen-1) + col0.reduceNumber(reduce::Norm2).e(0); - else { - - int l = k+1; - while(col0.e(l) == (T)0.f) { - ++l; - if(l >= curLen) - throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !"); - } - - right = diag.e(l); - } - - T mid = left + (right - left) / (T)2.; - T fMid = secularEq(mid, col0, diag, permut, diag, 0.); - T shift = (k == curLen-1 || fMid > (T)0.) ? left : right; - - auto diagShifted = diag - shift; - - T muPrev, muCur; - if (shift == left) { - muPrev = (right - left) * 0.1; - if (k == curLen-1) - muCur = right - left; - else - muCur = (right - left) * 0.5; - } - else { - muPrev = -(right - left) * 0.1; - muCur = -(right - left) * 0.5; - } - - T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift); - T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); - - if (math::nd4j_abs(fPrev) < math::nd4j_abs(fCur)) { - math::nd4j_swap(fPrev, fCur); - math::nd4j_swap(muPrev, muCur); - } - - bool useBisection = fPrev * fCur > (T)0.; - while (fCur != (T).0 && - math::nd4j_abs(muCur - muPrev) > (T)8. * DataTypeUtils::eps() * math::nd4j_max(math::nd4j_abs(muCur), math::nd4j_abs(muPrev)) - && math::nd4j_abs(fCur - fPrev) > DataTypeUtils::eps() && !useBisection) { - - T a = (fCur - fPrev) / ((T)1./muCur - (T)1./muPrev); - T jac = fCur - a / muCur; - T muZero = -a/jac; - T fZero = secularEq(muZero, col0, diag, permut, diagShifted, shift); - - muPrev = muCur; - fPrev = fCur; - muCur = muZero; - fCur = fZero; - - if (shift == left && (muCur < (T)0. || muCur > right - left)) - useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) - useBisection = true; - if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) - useBisection = true; - } - - - if (useBisection) { - - T leftShifted, rightShifted; - if (shift == left) { - leftShifted = DataTypeUtils::min(); - rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6); - } - else { - - leftShifted = -(right - left) * (T)0.6; - rightShifted = -DataTypeUtils::min(); - } - - T fLeft = secularEq(leftShifted, col0, diag, permut, diagShifted, shift); - T fRight = secularEq(rightShifted, col0, diag, permut, diagShifted, shift); - // if(fLeft * fRight >= (T)0.) - // throw "ops::helpers::SVD::calcSingVals method: fLeft * fRight >= (T)0. !"; - - while (rightShifted - leftShifted > (T)2.f * DataTypeUtils::eps() * math::nd4j_max(math::nd4j_abs(leftShifted), math::nd4j_abs(rightShifted))) { - - T midShifted = (leftShifted + rightShifted) / (T)2.; - fMid = secularEq(midShifted, col0, diag, permut, diagShifted, shift); - if (fLeft * fMid < (T)0.) - rightShifted = midShifted; - else { - leftShifted = midShifted; - fLeft = fMid; - } - } - muCur = (leftShifted + rightShifted) / (T)2.; - } - singVals.p(k, shift + muCur); - shifts.p(k, shift); - mus.p(k, muCur); - } - -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals, const NDArray& shifts, const NDArray& mus, NDArray& zhat) { - - int n = col0.lengthOf(); - int m = permut.lengthOf(); - if(m==0) { - zhat.assign(0.); - return; - } - - int last = permut.e(m-1); - - for (int k = 0; k < n; ++k) { - - if (col0.e(k) == (T)0.f) - zhat.p(k, (T)0.f); - else { - T dk = diag.e(k); - T prod = (singVals.e(last) + dk) * (mus.e(last) + (shifts.e(last) - dk)); - - for(int l = 0; l(l); - if(i!=k) { - int j = i(l-1); - prod *= ((singVals.e(j)+dk) / ((diag.e(i)+dk))) * ((mus.e(j)+(shifts.e(j)-dk)) / ((diag.e(i)-dk))); - } - } - T tmp = math::nd4j_sqrt(prod); - zhat.p(k, col0.e(k) > (T)0.f ? tmp : -tmp); - } - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArray& perm, const NDArray& singVals, - const NDArray& shifts, const NDArray& mus, NDArray& U, NDArray& V) { - - int n = zhat.lengthOf(); - int m = perm.lengthOf(); - - for (int k = 0; k < n; ++k) { - - auto colU = new NDArray(U({0,0, k,k+1}, true)); - *colU = 0.; - NDArray* colV = nullptr; - - if (_calcV) { - colV = new NDArray(V({0,0, k,k+1}, true)); - *colV = 0.; - } - - if (zhat.e(k) == (T)0.f) { - colU->p(k, 1.f); - - if (_calcV) - colV->p(k, 1.f); - } - else { - - for(int l = 0; l < m; ++l) { - int i = perm.e(l); - U.p(i,k, zhat.e(i)/(((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); - } - U.p(n,k, 0.f); - *colU /= colU->reduceNumber(reduce::Norm2); - - if (_calcV) { - - for(int l = 1; l < m; ++l){ - int i = perm.e(l); - V.p(i,k, diag.e(i) * zhat.e(i) / (((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); - } - V.p(0,k, -1.f); - *colV /= colV->reduceNumber(reduce::Norm2); - } - } - delete colU; - if (_calcV) - delete colV; - } - - auto colU = U({0,0, n,n+1}, true); - colU = 0.; - colU.p(n, 1.); -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDArray& V) { - - const T almostZero = DataTypeUtils::min(); - auto col0 = _m({col1, col1+size, col1, col1+1}, true); - auto diag = static_cast(_m({col1, col1+size, col1, col1+size}, true).diagonal('c')); - - diag.p(Nd4jLong(0), T(0)); - singVals = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - U = NDArrayFactory::create(_u.ordering(), {size+1, size+1}, _u.getContext()); - if (_calcV) - V = NDArrayFactory::create(_v.ordering(), {size, size}, _v.getContext()); - - int curSize = size; - while(curSize > 1 && diag.template e(curSize-1) == (T)0.f) - --curSize; - - int m = 0; - std::vector indices; - for(int k = 0; k < curSize; ++k) - if(math::nd4j_abs(col0.template e(k)) > almostZero) - indices.push_back((T)k); - - auto permut = NDArrayFactory::create(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext()); - auto shifts = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto mus = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto zhat = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - - calcSingVals(col0, diag, permut, singVals, shifts, mus); - perturb(col0, diag, permut, singVals, shifts, mus, zhat); - calcSingVecs(zhat, diag, permut, singVals, shifts, mus, U, V); - - for(int i=0; i(i) > singVals.e(i+1)) { - T _e0 = singVals.e(i); - T _e1 = singVals.e(i+1); - //math::nd4j_swap(singVals(i),singVals(i+1)); - singVals.p(i, _e1); - singVals.p(i+1, _e0); - - auto temp1 = U({0,0, i,i+1}, true); - auto temp2 = U({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - - if(_calcV) { - auto temp1 = V({0,0, i,i+1}, true); - auto temp2 = V({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - } - } - - auto temp1 = singVals({0,curSize, 0,0}, true); - for (int e = 0; e < curSize / 2; ++e) { - T tmp = temp1.e(e); - temp1.p(e, temp1.e(curSize-1-e)); - temp1.p(curSize-1-e, tmp); - } - - auto temp2 = U({0,0, 0,curSize}, true); - for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); - } - - if (_calcV) { - auto temp2 = V({0,0, 0,curSize}, true); - for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); - } - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shift) { - - // requires rows = cols + 1; - const int n = col2 - col1 + 1; - const int k = n/2; - const T almostZero = DataTypeUtils::min(); - T alphaK; - T betaK; - T r0; - T lambda, phi, c0, s0; - auto l = NDArrayFactory::create(_u.ordering(), {1, k}, _u.getContext()); - auto f = NDArrayFactory::create(_u.ordering(), {1, n-k-1}, _u.getContext()); - - if(n < _switchSize) { - - JacobiSVD jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV); - - if (_calcU) { - auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true); - temp.assign(jac._u); - } - else { - auto temp1 = _u({0,1, col1,col1+n+1}, true); - temp1.assign(jac._u({0,1, 0,0}, true)); - auto temp2 = _u({1,2, col1,col1+n+1}, true); - temp2.assign(jac._u({n,n+1, 0,0}, true)); - } - - if (_calcV) { - auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true); - temp.assign(jac._v); - } - - auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true); - temp.assign(0.); - auto diag = _m.diagonal('c'); - diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true)); - - return; - } - - alphaK = _m.e(col1 + k, col1 + k); - betaK = _m.e(col1 + k + 1, col1 + k); - - DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift); - DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1); - - if (_calcU) { - lambda = _u.e(col1 + k, col1 + k); - phi = _u.e(col1 + k + 1, col2 + 1); - } - else { - lambda = _u.e(1, col1 + k); - phi = _u.e(0, col2 + 1); - } - - r0 = math::nd4j_sqrt((math::nd4j_abs(alphaK * lambda) * math::nd4j_abs(alphaK * lambda)) + math::nd4j_abs(betaK * phi) * math::nd4j_abs(betaK * phi)); - - if(_calcU) { - l.assign(_u({col1+k, col1+k+1, col1,col1+k}, true)); - f.assign(_u({col1+k+1,col1+k+2, col1+k+1,col1+n}, true)); - } - else { - l.assign(_u({1,2, col1, col1+k}, true)); - f.assign(_u({0,1, col1+k+1, col1+n}, true)); - } - - // UofSVD.printIndexedBuffer(); - // VofSVD.printIndexedBuffer(); - // singVals.printIndexedBuffer(); - // printf("!! \n"); - - if (_calcV) - _v.p(row1W+k, col1W, 1.f); - - if (r0 < almostZero){ - c0 = 1.; - s0 = 0.; - } - else { - c0 = alphaK * lambda / r0; - s0 = betaK * phi / r0; - } - - if (_calcU) { - - auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true); - NDArray q1(temp); - - for (int i = col1 + k - 1; i >= col1; --i) { - auto temp = _u({col1,col1+k+1, i+1,i+2}, true); - temp.assign(_u({col1, col1+k+1, i, i+1}, true)); - } - - _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0); - _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0)); - _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0); - _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0; - } - else { - - T q1 = _u.e(0, col1 + k); - - for (int i = col1 + k - 1; i >= col1; --i) - _u.p(0, i+1, _u.e(0, i)); - - _u.p(0, col1, q1 * c0); - _u.p(0, col2+1, -q1*s0); - _u.p(1, col1, _u.e(1, col2+1) * s0); - _u.p(1, col2 + 1, _u.e(1, col2 + 1) * c0); - _u({1,2, col1+1, col1+k+1}, true) = 0.f; - _u({0,1, col1+k+1, col1+n}, true) = 0.f; - } - - _m.p(col1 + shift, col1 + shift, r0); - auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true); - temp1.assign(l*alphaK); - auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true); - temp2.assign(f*betaK); - - deflation(col1, col2, k, row1W, col1W, shift); - - NDArray UofSVD, VofSVD, singVals; - calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD); - - if(_calcU) { - auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); - } - else { - auto pTemp = _u({0,0, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); - } - - if (_calcV) { - auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, VofSVD)); - } - - auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true); - blockM = 0.f; - auto diag = blockM.diagonal('c'); - diag.assign(singVals); -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDArray& U, const NDArray& V) { - - if (_calcU) { - - int colsU = _fullUV ? hhU.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_u.ordering(), {hhU.rows(), colsU}, _u.getContext()); - temp1.setIdentity(); - _u = temp1; - - auto temp2 = _u({0,_diagSize, 0,_diagSize}, true); - temp2.assign(V({0,_diagSize, 0,_diagSize}, true)); - const_cast(hhU).mulLeft(_u); - } - - if (_calcV) { - - int colsV = _fullUV ? hhV.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_v.ordering(), {hhV.rows(), colsV}, _v.getContext()); - temp1.setIdentity(); - _v = temp1; - - auto temp2 = _v({0,_diagSize, 0,_diagSize}, true); - temp2.assign(U({0,_diagSize, 0,_diagSize}, true)); - const_cast(hhV).mulLeft(_v); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::evalData(const NDArray& matrix) { - - const T almostZero = DataTypeUtils::min(); - - if(matrix.sizeAt(1) < _switchSize) { - - JacobiSVD jac(matrix, _calcU, _calcV, _fullUV); - - if(_calcU) - _u = jac._u; - if(_calcV) - _v = jac._v; - - _s.assign(jac._s); - - return; - } - - T scale = matrix.reduceNumber(reduce::AMax).e(0); - - if(scale == (T)0.) - scale = 1.; - - NDArray copy; - if(_transp) - copy = matrix.transpose(); - else - copy = matrix / scale; - - BiDiagonalUp biDiag(copy); - - _u = 0.; - _v = 0.; - - auto temp1 = biDiag._HHbidiag.transpose(); - auto temp2 = _m({0,_diagSize, 0,0}, true); - temp2.assign(temp1); - - auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true); - temp3.assign(0.); - - DivideAndConquer(0, _diagSize - 1, 0, 0, 0); - - for (int i = 0; i < _diagSize; ++i) { - T a = math::nd4j_abs(_m.e(i, i)); - _s.p(i, a * scale); - if (a < almostZero) { - auto temp = _s({i+1,_diagSize, 0,0}, true); - temp.assign(0.); - break; - } - else if (i == _diagSize-1) - break; - } - - if(_transp) - exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u); - else - exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v); -} - - -BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT SVD,,FLOAT_TYPES); - - ////////////////////////////////////////////////////////////////////////// // svd operation, this function is not method of SVD class, it is standalone function template @@ -972,9 +67,10 @@ static void svd_(const NDArray* x, const std::vector& outArrs, const b } } - void svd(sd::LaunchContext * context, const NDArray* x, const std::vector& outArrs, const bool fullUV, const bool calcUV, const int switchNum) { - BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES); - } +////////////////////////////////////////////////////////////////////////// +void svd(sd::LaunchContext * context, const NDArray* x, const std::vector& outArrs, const bool fullUV, const bool calcUV, const int switchNum) { + BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES); +} } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp index fdab43261..65edeb71b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp @@ -73,8 +73,8 @@ namespace helpers { NDArray sortedVals = NDArrayFactory::create('c', {k}, input->getContext()); NDArray topIndices = NDArrayFactory::create('c', {k}, input->getContext()); for (uint pos = 0; pos < k; ++pos) { - topIndices.t(pos) = pos; - topValues.t(pos) = trial.t(pos); + topIndices.r(pos) = pos; + topValues.r(pos) = trial.t(pos); } //std::vector sortedVals(topValues); sortedVals.assign(topValues);// = NDArrayFactory::create('c', {k}); @@ -93,9 +93,9 @@ namespace helpers { T* topBegin = reinterpret_cast(topValues.buffer()); T* topEnd = topBegin + k; auto exchangePos = std::distance(topBegin, std::find(topBegin, topEnd, sortedVals.t(0))); - topValues.t(exchangePos) = val; //*exchangeIt = val; - topIndices.t(exchangePos) = i; - sortedVals.t(0) = val; // suppress in sorted + topValues.r(exchangePos) = val; //*exchangeIt = val; + topIndices.r(exchangePos) = i; + sortedVals.r(0) = val; // suppress in sorted //std::sort(sortedVals.begin(), sortedVals.end()); // sorted in ascending order SpecialMethods::sortGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), false); } @@ -107,7 +107,7 @@ namespace helpers { for (Nd4jLong j = 0; j < width; j++) for (uint pos = 0; pos < k; ++pos) if (topValues.t(pos) == trial.t(j)) - topIndices.t(pos) = j; + topIndices.r(pos) = j; } else { // else sort by indices std::map sortValsMap; @@ -121,8 +121,8 @@ namespace helpers { //}); Nd4jLong e = 0; for (auto it = sortValsMap.begin(); it != sortValsMap.end(); ++it, e++) { - topIndices.t(e) = it->first; - topValues.t(e) = it->second; + topIndices.r(e) = it->first; + topValues.r(e) = it->second; } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp index bcf406392..86847da16 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp @@ -39,17 +39,17 @@ namespace helpers { * * */ template - static void lowerTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) { + static void lowerTriangularSolve(sd::LaunchContext * context, NDArray const * leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) { auto rows = leftInput->rows(); auto cols = rightInput->columns(); - //output->t(0,0) = rightInput->t(0,0) / leftInput->t(0,0); + //output->r(0,0) = rightInput->t(0,0) / leftInput->t(0,0); for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong j = 0; j < cols; j++) { auto sum = rightInput->t(r, j); for (Nd4jLong c = 0; c < r; c++) { sum -= leftInput->t(r, c) * output->t(c, j); } - output->t(r, j) = sum / leftInput->t(r, r); + output->r(r, j) = unitsOnDiag?sum: sum / leftInput->t(r, r); } } } @@ -69,7 +69,7 @@ namespace helpers { * */ template - static void upperTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) { + static void upperTriangularSolve(sd::LaunchContext* context, NDArray const* leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) { auto rows = leftInput->rows(); auto cols = rightInput->columns(); for (Nd4jLong r = rows; r > 0; r--) { @@ -78,11 +78,31 @@ namespace helpers { for (Nd4jLong c = r; c < rows; c++) { sum -= leftInput->t(r - 1, c) * output->t(c, j); } - output->t(r - 1, j) = sum / leftInput->t(r - 1, r - 1); + output->r(r - 1, j) = unitsOnDiag? sum : sum / leftInput->t(r - 1, r - 1); } } } + /// triangularSolve2D - 2D implementation of triangularSolveFunctor + /// \tparam T - type of NDArray output + /// \param context - launch context pointer + /// \param leftInput - T matrix of equation Tx = b + /// \param rightInput - b vector of equation Tx = b + /// \param lower - lower or upper triangular matrix + /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed + /// \param output - output vector (x on equation Tx = b) + /// + template + void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) { + if (lower) { + lowerTriangularSolve(context, &leftInput, &rightInput, unitsOnDiag, &output); + } + else { + upperTriangularSolve(context, &leftInput, &rightInput, unitsOnDiag, &output); + } + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); + template static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { auto leftPart = leftInput->allTensorsAlongDimension({-2, -1}); @@ -92,9 +112,9 @@ namespace helpers { auto batchLoop = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { if (lower) { - lowerTriangularSolve(context, leftPart[i], rightPart[i], adjoint, outputPart[i]); + lowerTriangularSolve(context, leftPart[i], rightPart[i], false, outputPart[i]); } else { - upperTriangularSolve(context, leftPart[i], rightPart[i], adjoint, outputPart[i]); + upperTriangularSolve(context, leftPart[i], rightPart[i], false, outputPart[i]); } } }; @@ -116,13 +136,13 @@ namespace helpers { if (!lower) { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = 0; c <= r; c++) { - outputPart[batch]->t(r, c) = inputPart[batch]->t(c, r); + outputPart[batch]->r(r, c) = inputPart[batch]->t(c, r); } } } else { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = r; c < cols; c++) { - outputPart[batch]->t(r, c) = inputPart[batch]->t(c, r); + outputPart[batch]->r(r, c) = inputPart[batch]->t(c, r); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp index 4194e976c..eb2074865 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp @@ -38,7 +38,7 @@ static void triuBP_(sd::LaunchContext * context, const NDArray& input, const NDA auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { if (dOdI.t(i) != static_cast(0.f)) - dOdI.t(i) = static_cast(1.f); + dOdI.r(i) = static_cast(1.f); } }; samediff::Threads::parallel_for(func, 0, dLen); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu index c8f26de6f..6302262be 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu @@ -41,9 +41,9 @@ namespace sd { * * */ template - static __device__ void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, + static _CUDA_HD void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, - bool const adjoint, T* output, Nd4jLong const* outputShape, + bool const unitOnDiag, T* output, const Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = 0; r < rows; r++) { @@ -62,7 +62,7 @@ namespace sd { auto zcIndex = shape::getOffset(outputShape, posZ, 0); sum -= leftInput[xcIndex] * output[zcIndex]; } - output[zIndex] = sum / leftInput[xIndex]; + output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex]; } } } @@ -82,9 +82,9 @@ namespace sd { * */ template - static __device__ void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, - T const* rightInput, Nd4jLong const* rightInputShape, bool const adjoint, T* output, - Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { + static _CUDA_HD void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, + T const* rightInput, Nd4jLong const* rightInputShape, bool const unitOnDiag, T* output, + const Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = rows; r > 0; r--) { for (auto j = 0; j < cols; j++) { @@ -101,16 +101,16 @@ namespace sd { auto xcIndex = shape::getOffset(leftInputShape, pos, 0); sum -= leftInput[xcIndex] * output[zcIndex]; } - output[zIndex] = sum / leftInput[xIndex]; + output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex]; } } } template static __global__ void triangularSolveKernel(T const* leftInput, Nd4jLong const* leftPartShape, - T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const adjoint, T* output, - Nd4jLong const* outputShape, Nd4jLong const* tadLeftShape, Nd4jLong const* tadLeftOffset, Nd4jLong const* tadRightShape, - Nd4jLong const* tadRightOffset, Nd4jLong const* tadOutputShape, Nd4jLong const* tadOutputOffset, Nd4jLong batchNum) { + T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const unitsOnDiag, T* output, + const Nd4jLong* outputShape, const Nd4jLong* tadLeftShape, const Nd4jLong* tadLeftOffset, const Nd4jLong* tadRightShape, + const Nd4jLong* tadRightOffset, const Nd4jLong* tadOutputShape, const Nd4jLong* tadOutputOffset, Nd4jLong batchNum) { __shared__ Nd4jLong rows; __shared__ Nd4jLong cols; @@ -130,16 +130,16 @@ namespace sd { auto pRightPart = rightInput + tadRightOffset[i]; auto pOutputPart = output + tadOutputOffset[i]; if (lower) { - lowerTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols); + lowerTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols); } else { - upperTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols); + upperTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols); } } } template static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, - bool lower, bool adjoint, NDArray* output) { + bool lower, bool unitsOnDiag, NDArray* output) { NDArray::prepareSpecialUse({output}, {leftInput, rightInput}); auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1}); auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1}); @@ -150,7 +150,7 @@ namespace sd { T const* rightBuf = reinterpret_cast(rightInput->specialBuffer()); T* outputBuf = reinterpret_cast(output->specialBuffer()); triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->specialShapeInfo(), - rightBuf, rightInput->specialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), + rightBuf, rightInput->specialShapeInfo(), lower, unitsOnDiag, outputBuf, output->specialShapeInfo(), leftTads.specialShapeInfo(), leftTads.specialOffsets(), rightTads.specialShapeInfo(), rightTads.specialOffsets(), outputTads.specialShapeInfo(), outputTads.specialOffsets(), leftTads.numberOfTads()); @@ -161,8 +161,41 @@ namespace sd { } - int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { - BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, adjoint, output), FLOAT_NATIVE); + /// triangularSolve2D - 2D implementation of triangularSolveFunctor + /// \tparam T - type of NDArray output + /// \param context - launch context pointer + /// \param leftInput - T matrix of equation Tx = b + /// \param rightInput - b vector of equation Tx = b + /// \param lower - lower or upper triangular matrix + /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed + /// \param output - output vector (x on equation Tx = b) + /// + template + void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) { + + triangularSolveFunctor_(context, const_cast(&leftInput), const_cast(&rightInput), lower, unitsOnDiag, &output); + + // leftInput.syncToHost(); rightInput.syncToHost(); output.syncToHost(); + // T const* pLeftPart = (T const*)leftInput.getBuffer(); + // T const* pRightPart = (T const*)rightInput.getBuffer(); + // T* pOutputPart = (T*)output.buffer(); + // auto rows = leftInput.rows(); + // auto cols = leftInput.columns(); + // if (lower) { + // lowerTriangularSolve(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols); + // } else { + // upperTriangularSolve(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols); + // } + // output.syncToDevice(); + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); + + int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output) { + BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, unitsOnDiag, output), FLOAT_NATIVE); } template @@ -229,6 +262,76 @@ namespace sd { BUILD_SINGLE_SELECTOR(input->dataType(), adjointTriangularMatrix_, (context, input, lower, output), FLOAT_NATIVE); } - } - } +/* + ////////////////////////////////////////////////////////////////////////// + template + void triangularSolve2D(sd::LaunchContext* context, NDArray const& A, NDArray const& b, bool const lower, bool const unitsOnDiag, NDArray& x) { + + if(A.rankOf() != 2) + throw std::runtime_error("triangularSolve2D: input matrix A must be 2D !"); + + int temp; + + const bool isBvector = b.isCommonVector(temp); + const bool isXvector = x.isCommonVector(temp); + + if(A.sizeAt(0) != (isBvector ? b.lengthOf() : b.sizeAt(0))) + throw std::runtime_error("triangularSolve2D: A and b must have the same number of rows !"); + + if(A.sizeAt(1) != (isXvector ? x.lengthOf() : x.sizeAt(0))) + throw std::runtime_error("triangularSolve2D: columns number of array A must be equal to rows number of array x !"); + + if(isBvector) { + + if(lower) { + + for (int i = 0; i < A.sizeAt(0); ++i) { + T sum = b.t(i); + for (int j = 0; j < i; ++j) + sum -= A.t(i,j) * x.t(j); + x.r(i) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + else { + + for (int i = A.sizeAt(0) - 1; i >= 0; --i) { + T sum = b.t(i); + for (int j = i + 1; j < A.sizeAt(1); ++j) + sum -= A.t(i,j) * x.t(j); + x.r(i) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + else { + + if(lower) { + + for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) { + for (int i = 0; i < A.sizeAt(0); ++i) { + T sum = b.t(i, bCol); + for (int j = 0; j < i; ++j) + sum -= A.t(i,j) * x.t(j, bCol); + x.r(i, bCol) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + else { + + for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) { + for (int i = A.sizeAt(0) - 1; i >= 0; --i) { + T sum = b.t(i, bCol); + for (int j = i + 1; j < A.sizeAt(1); ++j) + sum -= A.t(i,j) * x.t(j, bCol); + x.r(i, bCol) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + } + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); +*/ + + +} +} } diff --git a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp index bbcb1eca3..4baa36d65 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp @@ -50,6 +50,7 @@ namespace sd { // make sure host buffer is updated values.syncToHost(); indices.syncToHost(); + output.syncToHost(); auto rank = output.rankOf(); diff --git a/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp new file mode 100644 index 000000000..b8cc6d8ac --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * ThnIn program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which nIn available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * dnIntributed under the License nIn dnIntributed on an "AS nIn" BASnIn, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permnInsions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + + +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtm_(const NDArray* x, NDArray* z) { + + + if(x->rankOf() == 2) { + + ops::helpers::Sqrtm::calc(*x, *z); + } + else { + + auto listX = x->allTensorsAlongDimension({-2, -1}); + auto listZ = z->allTensorsAlongDimension({-2, -1}); + + auto func = PRAGMA_THREADS_FOR { + + for (auto i = start; i < stop; i++) + ops::helpers::Sqrtm::calc(*listX.at(i), *listZ.at(i)); + }; + + samediff::Threads::parallel_tad(func, 0, listX.size()); + } +} + + +////////////////////////////////////////////////////////////////////////// +void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z) { + + x->syncToHost(); + BUILD_SINGLE_SELECTOR(z->dataType(), sqrtm_, (x, z), FLOAT_TYPES); + z->syncToDevice(); +} + + + +} +} +} diff --git a/libnd4j/include/ops/declarable/helpers/sqrtm.h b/libnd4j/include/ops/declarable/helpers/sqrtm.h new file mode 100644 index 000000000..2a123d420 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/sqrtm.h @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_SQRTM_HELPER_H +#define LIBND4J_SQRTM_HELPER_H + +#include +#include "array/NDArray.h" + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z); + + +} +} +} + +#endif //LIBND4J_SQRTM_HELPER_H diff --git a/libnd4j/include/ops/declarable/helpers/triangular_solve.h b/libnd4j/include/ops/declarable/helpers/triangular_solve.h index 73965f8c5..94e0198af 100644 --- a/libnd4j/include/ops/declarable/helpers/triangular_solve.h +++ b/libnd4j/include/ops/declarable/helpers/triangular_solve.h @@ -26,7 +26,9 @@ namespace sd { namespace ops { namespace helpers { - int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output); + int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output); + template + void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, const bool lower, const bool unitsOnDiag, NDArray& output); void adjointMatrix(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output); } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index 4139e9785..e4391c688 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -47,7 +47,7 @@ TEST_F(DeclarableOpsTests11, test_listdiff_1) { auto result = op.evaluate({&x, &y}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); - + } /////////////////////////////////////////////////////////////////// @@ -392,10 +392,10 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::log_loss_grad op; @@ -431,9 +431,9 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::log_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {1e-7}, {3}); @@ -1608,7 +1608,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_2) { // z->printIndexedBuffer("Solve 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_3) { @@ -1645,7 +1645,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_3) { // z->printIndexedBuffer("Solve 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1678,7 +1678,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_1) { @@ -1707,7 +1707,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_1) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_2) { @@ -1740,7 +1740,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_2) { // exp.printBuffer("4_2 Triangular_Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1774,7 +1774,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_3) { // exp.printBuffer("4_3 Triangular_Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1808,7 +1808,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_4) { // exp.printBuffer("4_4 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1842,7 +1842,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_5) { // exp.printBuffer("4_5 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1876,7 +1876,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_6) { // exp.printBuffer("4_6 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_7) { @@ -1913,7 +1913,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_7) { // exp.printBuffer("4_7 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1947,7 +1947,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_5) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, SolveLS_Test_1) { @@ -2399,10 +2399,10 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::mean_sqerr_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2436,9 +2436,9 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::mean_sqerr_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2467,7 +2467,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test1) { ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) { @@ -2478,7 +2478,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) { auto result = op.evaluate({&x, &y}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) { @@ -2490,7 +2490,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) { auto result = op.evaluate({&x, &y, &eps}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } /////////////////////////////////////////////////////////////////// @@ -2830,10 +2830,10 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::absolute_difference_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2867,9 +2867,9 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::absolute_difference_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -3305,10 +3305,10 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test12) { logits.linspace(-0.08, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::sigm_cross_entropy_loss_grad op; @@ -3344,9 +3344,9 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test13) { logits.linspace(-0.08, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::sigm_cross_entropy_loss_grad op; auto results = op.evaluate({&logits, &weights, &labels}, {0.3}, {3}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index c37f3fe4a..c7222e6f7 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -2065,500 +2065,6 @@ TEST_F(DeclarableOpsTests13, lstmLayer_12) { #endif } -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 0; // [sL,bS,nIn] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut] - const auto retLastH = false; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector(), {0., 1.}, GradCheck::LossFunc::MEAN); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) { - - const int sL = 4; - const int bS = 3; - const int nIn = 3; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 1; // backward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 1; // backward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 2; // bidirectional sum - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 3; // bidirectional concat - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 3; // [sL, bS, nIn] - const int directionMode = 4; // bidirectional extra output dim - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, batchnorm_test1) { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp index 3d86cd92b..e01900e87 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp @@ -1923,7 +1923,6 @@ TEST_F(DeclarableOpsTests15, TestTensorMmul_BP17) { ASSERT_TRUE(isGradCorrect); } - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests15, gru_1) { @@ -1960,31 +1959,67 @@ TEST_F(DeclarableOpsTests15, gru_1) { } ////////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests15, gru_bp_1) { +TEST_F(DeclarableOpsTests15, sqrtm_1) { - const int sL = 3; - const int bS = 2; - const int nIn = 5; - const int nOut = 4; + NDArray x1('c', {1,1}, {4.}, sd::DataType::DOUBLE); + NDArray x2('c', {2,2}, {1.3,2,0.3,.5}, sd::DataType::DOUBLE); + NDArray x3('c', {3,3}, {0.5 ,-0.4 ,1.2 ,-2.8 ,-0.2 ,-2.1 ,-2.4 ,-2.0 ,1.1}, sd::DataType::DOUBLE); + NDArray x4('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray x5('c', {5,5}, {2.4 ,0.3 ,0.0 ,1.1 ,1.8 ,0.1 ,1.7 ,2.7 ,1.5 ,2.6 ,0.6 ,2.1 ,2.2 ,1.0 ,0.2 ,1.2 ,2.8 ,1.9 ,0.8 ,2.0 ,0.5 ,1.6 ,0.9 ,1.4 ,2.5}, sd::DataType::DOUBLE); + NDArray exp1('c', {1,1}, {2.}, sd::DataType::DOUBLE); + NDArray exp2('c', {2,2}, {1.0163674, 1.3341597,0.200124, 0.4827035}, sd::DataType::DOUBLE); + NDArray exp3('c', {3,3}, {6.5692188, 2.6273616,-0.1387864,-16.8404762,-7.0296495, 0.9204148,-11.4664296,-5.834273 , 2.2087478}, sd::DataType::DOUBLE); + NDArray exp4('c', {4,4}, {1.161387 ,-1.9343154, 0.230372 , 0.8660897,0.80588 , 3.4045446,-1.0152824,-2.0369467,2.2589629, 1.9674252, 1.5109997,-1.4283141,0.0226356, 1.3032279,-1.00396 , 1.8278487}, sd::DataType::DOUBLE); + NDArray exp5('c', {5,5}, {1.4175046,-0.4425298, 0.1846149, 0.3166522, 0.9140631,-0.1929139, 0.2889113, 1.4045273, 0.2600026, 1.552021 , 0.1372758, 0.5703854, 1.3336126, 0.3869317,-0.082492 , + 0.8607272, 3.1792474,-0.9499947, 0.8541668,-1.4243879, 0.0081136,-0.0622248, 0.4534325, 0.4641865, 1.8132138}, sd::DataType::DOUBLE); - NDArray x('c', {sL, bS, nIn}, {0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE); - NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {3*nOut}, sd::DataType::DOUBLE); + sd::ops::sqrtm op; - NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + auto results = op.evaluate({&x1}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp1.isSameShape(results.at(0))); + ASSERT_TRUE(exp1.equalsTo(results.at(0))); - Wx.linspace(1,-0.1); - Wh.linspace(0.2,0.2); - b.linspace(1,-0.15); + results = op.evaluate({&x2}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp2.isSameShape(results.at(0))); + ASSERT_TRUE(exp2.equalsTo(results.at(0))); - const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {}); - const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {}); + results = op.evaluate({&x3}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp3.isSameShape(results.at(0))); + ASSERT_TRUE(exp3.equalsTo(results.at(0))); - sd::ops::gru opFF; - sd::ops::gru_bp opBP; + results = op.evaluate({&x4}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp4.isSameShape(results.at(0))); + ASSERT_TRUE(exp4.equalsTo(results.at(0))); - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + results = op.evaluate({&x5}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp5.isSameShape(results.at(0))); + ASSERT_TRUE(exp5.equalsTo(results.at(0))); +} + +////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests15, sqrtm_2) { + + NDArray x('c', {10,10}, {-0.3 ,2.7 ,4.9 ,7.0 ,7.3 ,-1.3 ,0.5 ,9.9 ,-9.4 ,8.4 ,2.2 ,5.2 ,7.6 ,1.2 ,2.0 ,-3.8 ,2.1 ,6.1 ,1.6 ,6.9 ,5.1 ,5.3 ,6.4 ,8.7 ,0.1 ,8.5 , + 3.3 ,1.0 ,6.8 ,0.4 ,0.7 ,3.2 ,7.4 ,6.7 ,1.1 ,7.2 ,6.0 ,7.5 ,9.7 ,5.4 ,9.0 ,6.3 ,0.0 ,4.5 ,8.3 ,7.9 ,3.0 ,6.5 ,0.6 ,8.0 ,9.5 ,3.6 ,1.9 ,6.2 ,0.9 ,4.0 ,4.1 , + 8.1 ,3.9 ,4.3 ,4.7 ,3.7 ,3.4 ,5.8 ,10.0 ,8.6 ,9.3 ,9.1 ,4.6 ,1.4 ,7.8 ,1.5 ,7.7 ,4.2 ,9.6 ,8.2 ,-7.1 ,5.7 ,5.5 ,2.6 ,8.8 ,2.9 ,0.2 ,5.6 ,-2.5 ,8.9 ,2.8 ,0.8 ,1.5 ,3.1 ,3.5 ,4.4 ,2.4 ,9.2 ,-4.8 ,1.7 ,6.6 ,9.8 ,1.8 ,5.9}, sd::DataType::DOUBLE); + + NDArray expZ('c', {10,10}, {1.2779038, 0.0333321, 0.8215617, 0.5736392, 1.3973911, -1.1757741,0.1990005, 1.5893778, -3.0159568, 2.5829108,0.5692253, 2.219431 , 1.022612 , -0.3131795, -0.1957848, -1.7805065, + 0.6668489, 1.1968921, 0.9781974, 1.2007764,0.7028634, 0.7496937, 2.2511438, 2.1945378, 0.2559353, 2.8948612,-0.4306994, -0.9922216, 0.3884369, -1.4174481, + -1.6060233, 0.1571057, 1.432471 , 0.4508346, 0.0618069, -2.4511742,2.0641709, 2.4751085, 1.84787 , 3.4146313,0.7774219, 0.768369 , -0.1417226, -0.3970577, 2.9512879, 0.5474537, + 0.4991412, 0.7604095, 0.4523091, 1.7813704,2.5998339, 0.9402402, -0.82775 , 2.3637147, -0.6394584, 4.6181937,-0.1762181, -0.2820475, 0.9280713, -2.1876918, + 0.1576249, 0.336376 , 0.2017592, 0.851786 , 1.3542577, 1.2752901,2.9718476, 1.1102557, 0.0067319, -0.2652283,0.8839235, -0.2637131, 1.5687876, 0.5156139, 1.9015886, 0.9087172, + -1.5607482, 2.4216275, 1.0399745, -0.4930439,1.3044354, 0.1690006, 0.2106909, -0.2683631, -0.4193939, 1.0233265,0.4571777, -0.2024148, 2.3564855, 1.0442339, + 1.1073322, 1.0728525, -0.5917566, 2.2267418, -1.6096582, 2.0685315,0.6800798, 0.4451858, -0.4048465, 1.2347676}, sd::DataType::DOUBLE); + sd::ops::sqrtm op; + + auto results = op.evaluate({&x}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(expZ.isSameShape(results.at(0))); + ASSERT_TRUE(expZ.equalsTo(results.at(0))); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp index f111a888a..5f1aefe36 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp @@ -241,6 +241,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode) { ASSERT_EQ(exp, initial); } +#ifdef _RELEASE TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) { // [2,1,135079944,1,1,8192,1,99] auto initial = NDArrayFactory::create('c', {1, 135079944}); @@ -287,6 +288,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) { ASSERT_EQ(exp, initial); } +#endif diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp index e25bd0144..fae8c4918 100644 --- a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp @@ -45,61 +45,41 @@ public: }; -#ifndef __CUDABLAS__ - -TEST_F(HelpersTests1, test_binary_search_1) { - std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - - auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10); - ASSERT_EQ(2, idx); -} - -TEST_F(HelpersTests1, test_binary_search_2) { - std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - - auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10); - ASSERT_EQ(-1, idx); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, evalHHmatrix_test1) { +// /////////////////////////////////////////////////////////////////// +// TEST_F(HelpersTests1, evalHHmatrix_test1) { - auto x = NDArrayFactory::create('c', {1,4}, {14,17,3,1}); - auto exp = NDArrayFactory::create('c', {4,4}, {-0.629253, -0.764093, -0.13484, -0.0449467, -0.764093, 0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377, 0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987, 0.99876}); +// auto x = NDArrayFactory::create('c', {4}, {14,17,3,1}); +// auto exp = NDArrayFactory::create('c', {4,4}, {-0.629253, -0.764093, -0.13484, -0.0449467, -0.764093, 0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377, 0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987, 0.99876}); - auto result = ops::helpers::Householder::evalHHmatrix(x); - ASSERT_TRUE(result.isSameShape(&exp)); - ASSERT_TRUE(result.equalsTo(&exp)); +// auto result = ops::helpers::Householder::evalHHmatrix(x); +// ASSERT_TRUE(result.isSameShape(&exp)); +// ASSERT_TRUE(result.equalsTo(&exp)); -} +// } -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, evalHHmatrix_test2) { +// /////////////////////////////////////////////////////////////////// +// TEST_F(HelpersTests1, evalHHmatrix_test2) { - #ifdef __CUDABLAS__ - return; - #endif - auto x = NDArrayFactory::create('c', {1,3}, {14,-4,3}); - auto exp = NDArrayFactory::create('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027}); +// #ifdef __CUDABLAS__ +// return; +// #endif +// auto x = NDArrayFactory::create('c', {3}, {14,-4,3}); +// auto exp = NDArrayFactory::create('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027}); - auto result = ops::helpers::Householder::evalHHmatrix(x); +// auto result = ops::helpers::Householder::evalHHmatrix(x); - ASSERT_TRUE(result.isSameShape(&exp)); - ASSERT_TRUE(result.equalsTo(&exp)); - -} +// ASSERT_TRUE(result.isSameShape(&exp)); +// ASSERT_TRUE(result.equalsTo(&exp)); +// } ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, evalHHmatrixData_test1) { - #ifdef __CUDABLAS__ - return; - #endif - auto x = NDArrayFactory::create('c', {1,4}, {14,17,3,1}); - auto tail = NDArrayFactory::create('c', {1,3}); - auto expTail = NDArrayFactory::create('c', {1,3}, {0.468984, 0.0827618, 0.0275873}); + auto x = NDArrayFactory::create('c', {4}, {14,17,3,1}); + auto tail = NDArrayFactory::create('c', {3}); + auto expTail = NDArrayFactory::create('c', {3}, {0.468984, 0.0827618, 0.0275873}); const double normXExpected = -22.2486; const double coeffExpected = 1.62925; @@ -110,34 +90,24 @@ TEST_F(HelpersTests1, evalHHmatrixData_test1) { ASSERT_NEAR(coeff, coeffExpected, 1e-5); ASSERT_TRUE(tail.isSameShapeStrict(expTail)); ASSERT_TRUE(tail.equalsTo(&expTail)); - } - ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulLeft_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {1,3}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9}); ops::helpers::Householder::mulLeft(x, tail, 0.1); - // expTail.printShapeInfo(); ASSERT_TRUE(x.isSameShapeStrict(exp)); ASSERT_TRUE(x.equalsTo(&exp)); - } ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulLeft_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {3,1}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9}); @@ -152,9 +122,6 @@ TEST_F(HelpersTests1, Householder_mulLeft_test2) { ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulRight_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {1,3}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9,17.5,12.5, 1.5, 7, 2.5,15.5, 17.5, 15.8,16.4, 3.4, 1.4, 4.3,3.15,1.15,15.15}); @@ -163,16 +130,11 @@ TEST_F(HelpersTests1, Householder_mulRight_test1) { ASSERT_TRUE(x.isSameShapeStrict(exp)); ASSERT_TRUE(x.equalsTo(&exp)); - } - ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6,13,11,7,6,3,7,4,7,6,6,7,10}); auto hhMatrixExp = NDArrayFactory::create('c', {4,4}, {1.524000, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367,0, 0.229221,-0.272237,0.938237,0}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.1756, 24.3869, 0, 0, 0,-8.61985,-3.89823, 0, 0, 0, 4.03047,4.13018, 0, 0, 0,1.21666}); @@ -189,15 +151,11 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto hhMatrixExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821, 0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.2916,7.03123, 0, 0, 0, 16.145,-22.9275, 0, 0, 0, -9.9264,-11.5516, 0, 0, 0,-12.8554}); ops::helpers::BiDiagonalUp object(matrix); - // object._HHmatrix.printBuffer(); ASSERT_TRUE(hhMatrixExp.isSameShapeStrict(object._HHmatrix)); ASSERT_TRUE(hhMatrixExp.equalsTo(&object._HHmatrix)); @@ -208,9 +166,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12, 0,-15,10,2}); auto hhMatrixExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.2916,7.03123, 0, 0, 0,16.3413,-20.7828, 0, 0, 0,-18.4892,4.13261, 0, 0, 0,-21.323}); @@ -227,9 +182,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto vectorsUseqExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); auto vectorsVseqExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); @@ -254,9 +206,6 @@ TEST_F(HelpersTests1, HHsequence_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2}); auto vectorsUseqExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); auto vectorsVseqExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); @@ -281,9 +230,6 @@ TEST_F(HelpersTests1, HHsequence_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto vectorsUseqExp = NDArrayFactory::create('c', {4,4}, {1.524, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367, 0, 0.229221,-0.272237,0.938237, 0}); auto vectorsVseqExp = NDArrayFactory::create('c', {4,4}, {1.524, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367, 0, 0.229221,-0.272237,0.938237, 0}); @@ -308,9 +254,6 @@ TEST_F(HelpersTests1, HHsequence_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test4) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto exp = NDArrayFactory::create('c', {4,4}, {2.49369, 2.62176, 5.88386, 7.69905, -16.0588,-18.7319,-9.15007,-12.6164, 4.7247, 3.46252, 1.02038, -1.4533, 2.9279,-2.29178, 1.90139,-0.66187}); @@ -325,9 +268,6 @@ TEST_F(HelpersTests1, HHsequence_test4) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test5) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto exp = NDArrayFactory::create('c', {5,4}, {4.52891, 8.09473,-2.73704,-13.0302, -11.0752, 7.41549,-3.75125,0.815252, -7.76818,-15.9102,-9.90869,-11.8677, 1.63942,-17.0312,-9.05102,-4.49088, -9.63311,0.540226,-1.52764, 5.79111}); @@ -342,9 +282,6 @@ TEST_F(HelpersTests1, HHsequence_test5) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test6) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9,-1,3,9, -4.43019,-15.1713, -3.2854,-7.65743, -9.39162,-7.03599, 8.03827, 9.48453, -2.97785, -16.424, 5.35265,-20.1171, -0.0436177, -13.118,-8.37287,-17.3012, -1.14074, 4.18282,-10.0914,-5.69014}); @@ -360,9 +297,6 @@ TEST_F(HelpersTests1, HHsequence_test6) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test7) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto exp = NDArrayFactory::create('c', {4,4}, {9,13,3,6,-5.90424,-2.30926,-0.447417, 3.05712, -10.504,-9.31339, -8.85493,-10.8886, -8.29494,-10.6737, -5.94895,-7.55591}); @@ -376,9 +310,6 @@ TEST_F(HelpersTests1, HHsequence_test7) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test8) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto exp = NDArrayFactory::create('c', {5,4}, {9, -13, 3, 6, 13, 11, 7, -6, -6.90831,-5.01113, 0.381677,0.440128, -0.80107,0.961605,-0.308019,-1.96153, -0.795985, 18.6538, 12.0731, 16.9988}); @@ -392,9 +323,6 @@ TEST_F(HelpersTests1, HHsequence_test8) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test9) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -13, 3, 6, 13, 11, 7, -6, 3, 7, 4, 7, 3.77597, 18.6226,-0.674868, 4.61365, 5.02738,-14.1486, -2.22877,-8.98245, -0.683766, 1.73722, 14.9859, 12.0843}); @@ -408,9 +336,6 @@ TEST_F(HelpersTests1, HHsequence_test9) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test10) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, 2.58863, 11.0295,-4.17483,-0.641012, -1.21892,-16.3151, 6.12049, -20.0239, -0.901799,-15.0389,-12.4944, -20.2394}); @@ -425,9 +350,6 @@ TEST_F(HelpersTests1, HHsequence_test10) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test11) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, 1.14934, 4.40257, 8.70127,-1.18824, 1.5132,0.220419,-11.6285,-11.7549, 2.32148, 24.3838,0.256531, 25.9116}); @@ -442,9 +364,6 @@ TEST_F(HelpersTests1, HHsequence_test11) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test12) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, -1, 6, 7, 19, -2.62252,-22.2914, 4.76743,-19.6689, -1.05943,-9.00514,-11.8013,-7.94571}); @@ -459,9 +378,6 @@ TEST_F(HelpersTests1, HHsequence_test12) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test13) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9 , -1 , 3 , 9, -4.65167, 3.44652, 7.83593, 22.6899, -9.48514, -21.902, 5.66559,-13.0533, -0.343184, 15.2895, 7.2888, 14.0489, 0.289638,-1.87752, 3.944,-1.49707, -2.48845, 3.18285,-10.6685,0.406502}); @@ -476,9 +392,6 @@ TEST_F(HelpersTests1, HHsequence_test13) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test14) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto exp = NDArrayFactory::create('c', {5,5}, {1.78958, 8.06962,-6.13687, 4.36267, 1.06472, -14.9578, -8.1522, 1.30442,-18.3343,-13.2578, 13.5536, 5.50764, 15.7859, 7.60831, 11.7871, -1.3626,-0.634986, 7.60934, -2.1841, 5.62694, -13.0577, 15.1554, -7.6511, 3.76365,-5.87368}); @@ -494,9 +407,6 @@ TEST_F(HelpersTests1, HHsequence_test14) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test15) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto exp = NDArrayFactory::create('c', {5,5}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, -1, 6, 7, -9.26566,-16.4298, 1.64125,-17.3243,-7.70257, -16.7077, 4.80216,-19.1652,-2.42279,-13.0258}); @@ -511,9 +421,6 @@ TEST_F(HelpersTests1, HHsequence_test15) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test16) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -529,9 +436,6 @@ TEST_F(HelpersTests1, HHsequence_test16) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test17) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -547,9 +451,6 @@ TEST_F(HelpersTests1, HHsequence_test17) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test18) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -565,9 +466,6 @@ TEST_F(HelpersTests1, HHsequence_test18) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test19) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -581,305 +479,48 @@ TEST_F(HelpersTests1, HHsequence_test19) { } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test1) { +TEST_F(HelpersTests1, HHcolPivQR_1) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix = NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); - auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); - auto expU = NDArrayFactory::create('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13, 20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9, 9, 6, 14,-11}); + auto matrix1 = NDArrayFactory::create('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - ops::helpers::SVD svd(matrix, 4, true, true, true, 't'); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(1,1,2,2); + auto expQR = NDArrayFactory::create('c', {5,6}, {-32.6649659, -4.9594419, -8.2657365, 7.2248659, 16.5927006, 11.7251002, -0.1354883, -29.0586293, 10.9775804, -14.6886248, 4.1884104, 20.7115773, 0.3483986, 0.3236753, 25.5376258, 1.6432380, 9.6395914, -9.0237996, -0.0580664, 0.0798999, -0.0799029, 19.5280665, -4.9773587, 16.0968604, 0.3483986, -0.6667832, 0.0252425, 0.0159188, 10.6978354, -4.6919842}); + auto expCoeffs = NDArrayFactory::create('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0}); + auto expPermut = NDArrayFactory::create('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0}); - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); + ops::helpers::HHcolPivQR qr(matrix1); + + ASSERT_TRUE(expQR.equalsTo(&qr._qr)); + ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); + ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); + + ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); + ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); + ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test2) { +TEST_F(HelpersTests1, HHcolPivQR_2) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); - auto expM = NDArrayFactory::create('c', {5,5}, {22.6716,14, 9,-12,-12, 5,-4,-19, -7,-12, 0,16, 0, -6, 8, -10,14,-15, 6,-10, -14,12, -1,-16, 3}); - auto expU = NDArrayFactory::create('c', {5,5}, {-12.1738, 3, -13.4089, 7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11}); + auto matrix1 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - ops::helpers::SVD svd(matrix, 4, true, true, true); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(0,0,2,2); + auto expQR = NDArrayFactory::create('c', {6,6}, {38.1707, -3.03898, 5.16103, 23.0805, -7.57126, -13.885, -0.41519, 34.3623, 3.77403, 2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341, 12.8421, 28.5491, -0.290633, 0.111912,0.450367, 28.1139, 15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127, 22.294,-2.94931, 0.249114,0.0627956,0.657873, 0.76767,-0.752594,-7.46986}); + auto expCoeffs = NDArrayFactory::create('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0}); + auto expPermut = NDArrayFactory::create('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0}); - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); + ops::helpers::HHcolPivQR qr(matrix1); + + ASSERT_TRUE(expQR.equalsTo(&qr._qr)); + ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); + ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); + + ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); + ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); + ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test3) { +TEST_F(HelpersTests1, HHcolPivQR_3) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20}); - auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); - auto expU = NDArrayFactory::create('c', {2,6}, {18, 2.58377, 2, 7.16409,-11, 7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20}); - - ops::helpers::SVD svd(matrix, 4, false, true, true, 't'); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(1,1,2,2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test4) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8, 18,-17, 18, -14,-15,8.06226, 2, 2, -3,-18, 0,-17, 2, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16, -20, 13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18, 17, -10,-19, 14, -2, -7, -17, -14, -4,-16, 18, -6, -18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-18, -13, 14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 2, 2, 1, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test5) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2, -7,-17, -14, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 0, 1, 1, 0, 2, 2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test6) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9, -15.8359, -7,-12.2566,-4, 20}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, false, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 0, 1, 1, 0, 2, 2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test7) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8,19.6977,-17, 18, -14,-15, 1, 2, 2, -3,-18, 0,-17, 0, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10, -16,-20, 13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18, -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11, 8, 2,-6, -3, -8, 8,-2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation(1, 3, 1, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test8) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20,19,-18, -6, 3, 6, 2, -7, -7, 14,-15, 2,-17, 18, -14, 8, 1, 18, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11, 8,-6, 2, -3, -8, 8, 7,-2, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation(0, 2, 2, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test9) { - - #ifdef __CUDABLAS__ - return; - #endif - auto col0 = NDArrayFactory::create('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14}); - auto diag = NDArrayFactory::create('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2}); - auto permut = NDArrayFactory::create('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expSingVals = NDArrayFactory::create('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039}); - auto expShifts = NDArrayFactory::create('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2}); - auto expMus = NDArrayFactory::create('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039}); - - auto singVals = NDArrayFactory::create('c', {10,1}); - auto shifts = NDArrayFactory::create('c', {10,1}); - auto mus = NDArrayFactory::create('c', {10,1}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.calcSingVals(col0, diag, permut, singVals, shifts, mus); - - ASSERT_TRUE(expSingVals.equalsTo(&singVals)); - ASSERT_TRUE(expShifts.equalsTo(&shifts)); - ASSERT_TRUE(expMus.equalsTo(&mus)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test10) { - - #ifdef __CUDABLAS__ - return; - #endif - auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto col0 = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); - auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); - auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); - auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expZhat = NDArrayFactory::create('c', {4,1}, {0, 0.278208, 72.501953, 0}); - - auto zhat = NDArrayFactory::create('c', {4,1}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.perturb(col0, diag, permut, singVals, shifts, mus, zhat); - - ASSERT_NEAR(expZhat.e(1), zhat.e(1), EPS); - ASSERT_NEAR(expZhat.e(2), zhat.e(2), EPS); -} - - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test11) { - - #ifdef __CUDABLAS__ - return; - #endif - auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto zhat = NDArrayFactory::create('c', {4,1}, {2 ,1 ,2 ,1}); - auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); - auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); - auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); - auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expU = NDArrayFactory::create('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0, 0, 0, 0, 1}); - auto expV = NDArrayFactory::create('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102, 0.21114, -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966, 0.167906, 0.416415}); - auto U = NDArrayFactory::create('c', {5,5}); - auto V = NDArrayFactory::create('c', {4,4}); - - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V); - - ASSERT_TRUE(expU.equalsTo(&U)); - ASSERT_TRUE(expV.equalsTo(&V)); - -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test12) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto matrix4 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); - - auto expSingVals = NDArrayFactory::create('c', {4,1}, {8.43282, 5, 2.3, 1.10167}); - auto expU = NDArrayFactory::create('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1, 0, 0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0, 0, 0,1}); - auto expV = NDArrayFactory::create('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1, 0, 0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873}); - - ops::helpers::SVD svd(matrix4, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - NDArray U, singVals, V; - svd.calcBlockSVD(1, 4, U, singVals, V); - - ASSERT_TRUE(expSingVals.equalsTo(&singVals)); - ASSERT_TRUE(expU.equalsTo(&U)); - ASSERT_TRUE(expV.equalsTo(&V)); - - ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals)); - ASSERT_TRUE(expU.isSameShapeStrict(U)); - ASSERT_TRUE(expV.isSameShapeStrict(V)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test13) { - - #ifdef __CUDABLAS__ - return; - #endif NDArray matrix1('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); auto expQR = NDArrayFactory::create('c', {6,5}, {-37.054 , 0.323852 , 8.04231 , -22.9395 ,-13.089, 0.105164, 32.6021, 6.42277, -0.262898,-1.58766, 0.140218, -0.485058, 29.2073, -9.92301,-23.7111, -0.262909,-0.00866538, 0.103467, 8.55831,-1.86455, -0.315491, 0.539207, 0.40754,-0.0374124,-7.10401, 0.315491, 0.385363,-0.216459, -0.340008,0.628595}); @@ -898,60 +539,10 @@ TEST_F(HelpersTests1, SVD_test13) { } -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test14) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - - auto expQR = NDArrayFactory::create('c', {5,6}, {-32.665, -4.95944, -8.26574, 7.22487, 16.5927, 11.7251, -0.135488, -29.0586, 10.9776, -14.6886, 4.18841, 20.7116, 0.348399, 0.323675, 25.5376, 1.64324, 9.63959, -9.0238, -0.0580664,0.0798999,-0.0799029, 19.5281,-4.97736, 16.0969, 0.348399,-0.666783, 0.0252425,0.0159188, 10.6978,-4.69198}); - auto expCoeffs = NDArrayFactory::create('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0}); - auto expPermut = NDArrayFactory::create('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0}); - - ops::helpers::HHcolPivQR qr(matrix1); - - ASSERT_TRUE(expQR.equalsTo(&qr._qr)); - ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); - ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); - - ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); - ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); - ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); -} - - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test15) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - - auto expQR = NDArrayFactory::create('c', {6,6}, {38.1707, -3.03898, 5.16103, 23.0805, -7.57126, -13.885, -0.41519, 34.3623, 3.77403, 2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341, 12.8421, 28.5491, -0.290633, 0.111912,0.450367, 28.1139, 15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127, 22.294,-2.94931, 0.249114,0.0627956,0.657873, 0.76767,-0.752594,-7.46986}); - auto expCoeffs = NDArrayFactory::create('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0}); - auto expPermut = NDArrayFactory::create('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0}); - - ops::helpers::HHcolPivQR qr(matrix1); - - ASSERT_TRUE(expQR.equalsTo(&qr._qr)); - ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); - ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); - - ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); - ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); - ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); -} - - +#ifndef __CUDABLAS__ /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto left = NDArrayFactory::create('c', {2,2}); auto right = NDArrayFactory::create('c', {2,2}); @@ -968,9 +559,6 @@ TEST_F(HelpersTests1, JacobiSVD_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto matrix4 = NDArrayFactory::create('c', {5,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19}); auto matrix5 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); @@ -998,9 +586,6 @@ TEST_F(HelpersTests1, JacobiSVD_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1014,9 +599,6 @@ TEST_F(HelpersTests1, JacobiSVD_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test4) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1030,9 +612,6 @@ TEST_F(HelpersTests1, JacobiSVD_test4) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test5) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1046,9 +625,6 @@ TEST_F(HelpersTests1, JacobiSVD_test5) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test6) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1062,9 +638,6 @@ TEST_F(HelpersTests1, JacobiSVD_test6) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test7) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1078,9 +651,6 @@ TEST_F(HelpersTests1, JacobiSVD_test7) { ////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test8) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1094,9 +664,6 @@ TEST_F(HelpersTests1, JacobiSVD_test8) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test9) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto expS = NDArrayFactory::create('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071}); @@ -1113,9 +680,6 @@ TEST_F(HelpersTests1, JacobiSVD_test9) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test10) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto expS = NDArrayFactory::create('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071}); @@ -1132,9 +696,6 @@ TEST_F(HelpersTests1, JacobiSVD_test10) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test11) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747}); @@ -1151,9 +712,6 @@ TEST_F(HelpersTests1, JacobiSVD_test11) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test12) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747}); @@ -1170,9 +728,6 @@ TEST_F(HelpersTests1, JacobiSVD_test12) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test13) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1189,9 +744,6 @@ TEST_F(HelpersTests1, JacobiSVD_test13) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test14) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1208,9 +760,6 @@ TEST_F(HelpersTests1, JacobiSVD_test14) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test15) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1222,13 +771,314 @@ TEST_F(HelpersTests1, JacobiSVD_test15) { ASSERT_TRUE(expS.equalsTo(&jac._s)); } +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, JacobiSVD_test16) { + + NDArray rotation('c', {2,2}, sd::DataType::DOUBLE); + + NDArray exp1('c', {2,2}, {1,0,0,1 }, sd::DataType::DOUBLE); + NDArray exp2('c', {2,2}, {0,1,-1,0}, sd::DataType::DOUBLE); + NDArray exp3('c', {2,2}, {-1,0,0,-1}, sd::DataType::DOUBLE); + NDArray exp4('c', {2,2}, {0.983282, 0.182089, -0.182089, 0.983282}, sd::DataType::DOUBLE); + NDArray exp5('c', {2,2}, {0.249041, 0.968493, -0.968493, 0.249041}, sd::DataType::DOUBLE); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(0, 0, rotation); + ASSERT_TRUE(rotation.equalsTo(exp1)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp1)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(0, -0.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp2)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp2)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(-0.5, 0, rotation); + ASSERT_TRUE(rotation.equalsTo(exp3)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp3)); + + + ops::helpers::JacobiSVD::createJacobiRotationGivens(2.7, -0.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp4)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp4)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(2.7, -10.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp5)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp5)); +} + +TEST_F(HelpersTests1, test_binary_search_1) { + std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10); + ASSERT_EQ(2, idx); +} + +TEST_F(HelpersTests1, test_binary_search_2) { + std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10); + ASSERT_EQ(-1, idx); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test1) { + + auto matrix = NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); + auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); + auto expU = NDArrayFactory::create('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13, 20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9, 9, 6, 14,-11}); + + ops::helpers::SVD svd(matrix, 4, true, true, true, 't'); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(1,1,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test2) { + + auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); + auto expM = NDArrayFactory::create('c', {5,5}, {22.6716,14, 9,-12,-12, 5,-4,-19, -7,-12, 0,16, 0, -6, 8, -10,14,-15, 6,-10, -14,12, -1,-16, 3}); + auto expU = NDArrayFactory::create('c', {5,5}, {-12.1738, 3, -13.4089, 7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11}); + + ops::helpers::SVD svd(matrix, 4, true, true, true); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(0,0,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test3) { + + auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20}); + auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); + auto expU = NDArrayFactory::create('c', {2,6}, {18, 2.58377, 2, 7.16409,-11, 7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20}); + + ops::helpers::SVD svd(matrix, 4, false, true, true, 't'); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(1,1,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test4) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8, 18,-17, 18, -14,-15,8.06226, 2, 2, -3,-18, 0,-17, 2, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16, -20, 13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18, 17, -10,-19, 14, -2, -7, -17, -14, -4,-16, 18, -6, -18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-18, -13, 14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 2, 2, 1, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test5) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2, -7,-17, -14, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 0, 1, 1, 0, 2, 2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test6) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9, -15.8359, -7,-12.2566,-4, 20}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, false, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 0, 1, 1, 0, 2, 2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test7) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8,19.6977,-17, 18, -14,-15, 1, 2, 2, -3,-18, 0,-17, 0, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10, -16,-20, 13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18, -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11, 8, 2,-6, -3, -8, 8,-2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation(1, 3, 1, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test8) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20,19,-18, -6, 3, 6, 2, -7, -7, 14,-15, 2,-17, 18, -14, 8, 1, 18, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11, 8,-6, 2, -3, -8, 8, 7,-2, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation(0, 2, 2, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test9) { + + auto col0 = NDArrayFactory::create('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14}); + auto diag = NDArrayFactory::create('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2}); + auto permut = NDArrayFactory::create('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expSingVals = NDArrayFactory::create('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039}); + auto expShifts = NDArrayFactory::create('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2}); + auto expMus = NDArrayFactory::create('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039}); + + auto singVals = NDArrayFactory::create('c', {10,1}); + auto shifts = NDArrayFactory::create('c', {10,1}); + auto mus = NDArrayFactory::create('c', {10,1}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.calcSingVals(col0, diag, permut, singVals, shifts, mus); + + ASSERT_TRUE(expSingVals.equalsTo(&singVals)); + ASSERT_TRUE(expShifts.equalsTo(&shifts)); + ASSERT_TRUE(expMus.equalsTo(&mus)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test10) { + + auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto col0 = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); + auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); + auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); + auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expZhat = NDArrayFactory::create('c', {4,1}, {0, 0.278208, 72.501953, 0}); + + auto zhat = NDArrayFactory::create('c', {4,1}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.perturb(col0, diag, permut, singVals, shifts, mus, zhat); + + ASSERT_NEAR(expZhat.e(1), zhat.e(1), EPS); + ASSERT_NEAR(expZhat.e(2), zhat.e(2), EPS); +} + + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test11) { + + auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto zhat = NDArrayFactory::create('c', {4,1}, {2 ,1 ,2 ,1}); + auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); + auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); + auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); + auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expU = NDArrayFactory::create('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0, 0, 0, 0, 1}); + auto expV = NDArrayFactory::create('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102, 0.21114, -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966, 0.167906, 0.416415}); + auto U = NDArrayFactory::create('c', {5,5}); + auto V = NDArrayFactory::create('c', {4,4}); + + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V); + + ASSERT_TRUE(expU.equalsTo(&U)); + ASSERT_TRUE(expV.equalsTo(&V)); + +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test12) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto matrix4 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); + + auto expSingVals = NDArrayFactory::create('c', {4,1}, {8.43282, 5, 2.3, 1.10167}); + auto expU = NDArrayFactory::create('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1, 0, 0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0, 0, 0,1}); + auto expV = NDArrayFactory::create('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1, 0, 0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873}); + + ops::helpers::SVD svd(matrix4, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + NDArray U, singVals, V; + svd.calcBlockSVD(1, 4, U, singVals, V); + + ASSERT_TRUE(expSingVals.equalsTo(&singVals)); + ASSERT_TRUE(expU.equalsTo(&U)); + ASSERT_TRUE(expV.equalsTo(&V)); + + ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals)); + ASSERT_TRUE(expU.isSameShapeStrict(U)); + ASSERT_TRUE(expV.isSameShapeStrict(V)); +} /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, SVD_test16) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); @@ -1257,9 +1107,6 @@ TEST_F(HelpersTests1, SVD_test16) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, SVD_test17) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); @@ -1893,7 +1740,7 @@ TEST_F(HelpersTests1, OpArgsHolder_test3) { ASSERT_EQ(Status::OK(), results.status()); ASSERT_TRUE(exp.isSameShape(tiled)); ASSERT_TRUE(exp.equalsTo(tiled)); - + OpArgsHolder holderBP = holderFF.createArgsHolderForBP({&gradO}, true); sd::ops::tile_bp opBP; results = opBP.execute(holderBP); @@ -2495,4 +2342,3 @@ TEST_F(HelpersTests1, lstmLayerCell_3) { ASSERT_TRUE(expC.isSameShape(c)); ASSERT_TRUE(expC.equalsTo(c)); } - diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp new file mode 100644 index 000000000..8a0cc28bf --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp @@ -0,0 +1,426 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +#include "testlayers.h" +#include +#include +#include +#include +#include + +using namespace sd; + +class HelpersTests2 : public testing::Test { +public: + + HelpersTests2() { + + std::cout< hess1(x1); + ASSERT_TRUE(hess1._H.isSameShape(&x1)); + ASSERT_TRUE(hess1._H.equalsTo(&x1)); + ASSERT_TRUE(hess1._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess1._Q.equalsTo(&expQ)); + + ops::helpers::Hessenberg hess2(x2); + ASSERT_TRUE(hess2._H.isSameShape(&x2)); + ASSERT_TRUE(hess2._H.equalsTo(&x2)); + ASSERT_TRUE(hess2._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess2._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_2) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expQ('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + // hess._H.printBuffer(); + + ASSERT_TRUE(hess._H.isSameShape(&x)); + ASSERT_TRUE(hess._H.equalsTo(&x)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_3) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expH('c', {3,3}, {33, -23.06939, -48.45414, -57.01061, 12.62845, 3.344058, 0, -9.655942, -5.328448}, sd::DataType::DOUBLE); + NDArray expQ('c', {3,3}, {1,0,0,0, -0.99981, -0.019295, 0, -0.019295,0.99981}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_4) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expH('c', {4,4}, {0.33, 0.4961181, 3.51599, 9.017665, -7.792702, 4.190221, 6.500328, 5.438888, 0, 3.646734, 0.4641911, -7.635502, 0,0, 5.873535, 5.105588}, sd::DataType::DOUBLE); + NDArray expQ('c', {4,4}, {1,0,0,0, 0,-0.171956, 0.336675, -0.925787, 0,-0.973988,0.0826795, 0.210976, 0, 0.147574, 0.937984,0.3137}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_5) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expH('c', {10,10}, {6.9, 6.125208, -8.070945, 7.219828, -9.363308, 2.181236, 5.995414, 3.892612, 4.982657, -2.088574,-12.6412, 1.212547, -6.449684, 5.162879, 0.4341714, -5.278079, -2.624011, -2.03615, 11.39619, -3.034842, + 0, -12.71931, 10.1146, 6.494434, -1.062934, 5.668906, -4.672953, -9.319893, -2.023392, 6.090341,0,0, 7.800521, -1.46286, 1.484626, -10.58252, -3.492978, 2.42187, 5.470045, 1.877265, + 0,0,0, 14.78259,-0.3147726, -5.74874, -0.377823, 3.310056, 2.242614, -5.111574,0,0,0,0, -9.709131, 3.885072, 6.762626, 4.509144, 2.390195, -4.991013, + 0,0,0,0,0, 8.126269, -12.32529, 9.030151, 1.390931, 0.8634045,0,0,0,0,0,0, -12.99477, 9.574299,-0.3098022, 4.910835,0,0,0,0,0,0,0, 14.75256, 18.95723, -5.054717,0,0,0,0,0,0,0,0, -4.577715, -5.440827,}, sd::DataType::DOUBLE); + NDArray expQ('c', {10,10}, {1,0,0,0,0,0,0,0,0,0,0,-0.0079106,-0.38175,-0.39287,-0.26002,-0.44102,-0.071516,0.12118,0.64392,0.057562, + 0,0.28478,0.0058784,0.3837,-0.47888,0.39477,0.0036847,-0.24678,0.3229,0.47042,0,-0.031643,-0.61277,0.087648,0.12014,0.47648,-0.5288,0.060599,0.021434,-0.30102, + 0,0.23732,-0.17801,-0.31809,-0.31267,0.27595,0.30134,0.64555,-0.33392,0.13363,0,-0.023732,-0.40236,0.43089,-0.38692,-0.5178,-0.03957,-0.081667,-0.47515,-0.0077949, + 0,0.20568,-0.0169,0.36962,0.49669,-0.22475,-0.22199,0.50075,0.10454,0.46112,0,0.41926,0.30243,-0.3714,-0.16795,-0.12969,-0.67572,-0.1205,-0.26047,0.10407, + 0,-0.41135,-0.28357,-0.33858,0.18836,0.083822,-0.0068213,-0.30161,-0.24956,0.66327,0,0.68823,-0.33616,-0.12129,0.36163,-0.063256,0.34198,-0.37564,-0.048196,-0.058948}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_1) { + + NDArray x('c', {3,3}, sd::DataType::DOUBLE); + + NDArray expT('c', {3,3}, {-2.5, -2, 1, 0, 1.5, -2, 3, 4, 5}, sd::DataType::DOUBLE); + NDArray expU('c', {3,3}, {0.3, 0.2,-0.1, 0,-0.1, 0.2, -0.3,-0.4, 0.5}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + schur._T.linspace(-3, 1); + schur._U.linspace(-0.3, 0.1); + + schur.splitTwoRows(1, 0.5); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_2) { + + NDArray x('c', {3,3}, sd::DataType::DOUBLE); + + NDArray shift('c', {3}, sd::DataType::DOUBLE); + NDArray exp1('c', {3}, {1,-3,0}, sd::DataType::DOUBLE); + NDArray exp2('c', {3}, {3, 3,-7}, sd::DataType::DOUBLE); + NDArray exp3('c', {3}, {0.964,0.964,0.964}, sd::DataType::DOUBLE); + NDArray exp1T('c', {3,3}, {-3,-2,-1,0,1,2,3,4,5}, sd::DataType::DOUBLE); + NDArray exp2T('c', {3,3}, {-8,-2,-1,0,-4,2,3,4,0}, sd::DataType::DOUBLE); + NDArray exp3T('c', {3,3}, {-9.464102,-2,-1,0,-5.464102,2,3,4,-1.464102,}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + // schur._U.linspace(-0.3, 0.1); // doesn't matter + + schur._T.linspace(-3, 1); + double expShift =0; + schur.calcShift(1, 5, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp1T)); + ASSERT_TRUE(shift.isSameShape(&exp1)); + ASSERT_TRUE(shift.equalsTo(&exp1)); + ASSERT_TRUE(expShift == 0); + + schur._T.linspace(-3, 1); + expShift = 0; + schur.calcShift(2, 10, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp2T)); + ASSERT_TRUE(shift.isSameShape(&exp2)); + ASSERT_TRUE(shift.equalsTo(&exp2)); + ASSERT_TRUE(expShift == 5); + + schur._T.linspace(-3, 1); + expShift = 0; + schur.calcShift(2, 30, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp3T)); + ASSERT_TRUE(shift.isSameShape(&exp3)); + ASSERT_TRUE(shift.equalsTo(&exp3)); + ASSERT_TRUE((6.4641-0.00001) < expShift && expShift < (6.4641+0.00001)); +} + + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_3) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expU('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&x)); + ASSERT_TRUE(schur._T.equalsTo(&x)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_4) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expT('c', {3,3}, {53.73337,-20.21406,-50.44809,0,-27.51557, 26.74307,0,0,14.0822}, sd::DataType::DOUBLE); + NDArray expU('c', {3,3}, {-0.5848506, 0.7185352, 0.3763734,-0.7978391,-0.5932709,-0.1071558,-0.1462962, 0.3629555,-0.9202504}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_5) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expT('c', {4,4}, {6.940177,7.201107,2.523849,-8.534745,-3.109643,5.289615,-2.940507,9.330303, 0,0,-0.1740346, 7.19851,0,0, -2.870214, -1.965758}, sd::DataType::DOUBLE); + NDArray expU('c', {4,4}, {-0.2602141, 0.8077556,-0.3352316,-0.4091935,0.3285353,-0.4395489,-0.4714875,-0.6903338,0.7536921, 0.3005626,-0.3910435, 0.4343908,-0.5062621, -0.252962,-0.7158242, 0.4090287}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} +*/ +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_6) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expT('c', {10,10}, {-13.78982, 6.072464, 0.3021194, -8.455495,-0.3047058, 4.033153, 2.610364, 2.80607, -2.735616, 0.3040549,-2.188506, -12.38324, -1.167179, -4.539672, -19.08546, 1.752401,-0.1354974,-0.2747422,-0.3270464, -5.070936, + 0,0,0.5067366, 7.930223,-0.6465996, 8.659522, 1.283713, 4.551415, 12.7736, 3.4812,0,0,-9.858142, -2.905068, -6.474159, -6.247967, 0.4720073, -10.49523, 3.617189, -4.941627, + 0,0,0,0,9.461626, -4.896166, 9.339704, 4.640336, 16.8626, 2.056027,0,0,0,0,6.479812, 8.462862, 7.386285, -4.123457, -5.817095, -2.633641,0,0,0,0,0,0,13.46667, -4.907281, 4.602204, 5.198035, + 0,0,0,0,0,0, 7.176822, 16.93311, 2.195036, 1.346086,0,0,0,0,0,0,0,0, 16.86979, -3.052473,0,0,0,0,0,0,0,0,0, -5.52268}, sd::DataType::DOUBLE); + + // NDArray expT('c', {10,10}, {-13.78982, 6.072464, 0.1926198, -8.458698,-0.3047363, 4.033151, 2.610336, 2.806096, -2.735616, 0.3040549,-2.188506, -12.38324, -1.225857, -4.52418, -19.08548, 1.752257,-0.1354946,-0.2747435,-0.3270464, -5.070936, + // 0,0, 0.4812058, 7.886377,-0.7304318, 8.577898, 1.289673, 4.415163, 12.81936, 3.416929,0,0, -9.901988, -2.879537, -6.465196, -6.359608, 0.455452, -10.55328, 3.451505, -4.986284, + // 0,0,0,0, 9.461614, -4.896159, 9.339602, 4.64046, 16.86265, 2.056047,0,0,0,0, 6.47982, 8.462874, 7.386396, -4.123349, -5.816967, -2.633626, + // 0,0,0,0,0,0, 13.46665, -4.907315, 4.602182, 5.198022,0,0,0,0,0,0, 7.176788, 16.93313, 2.195081, 1.346137,0,0,0,0,0,0,0,0, 16.86979, -3.052473,0,0,0,0,0,0,0,0,0, -5.52268}, sd::DataType::DOUBLE); + + NDArray expU('c', {10,10}, {0.1964177, 0.2165192, -0.2138164, 0.4083154, -0.1872303, -0.5087223, 0.5529025, -0.2996174,-0.08772947, 0.07126534,-0.1906247, -0.223588, 0.3574755, 0.4245914, -0.3885589,-0.07328949, -0.4176507, -0.1885168, -0.4476957, 0.1971104, + -0.2219015, 0.3084187, 0.1069209, -0.4905009, -0.3517786, 0.1446875, 0.121738, -0.3772941, 0.1232591, 0.5353205,-0.4766346, 0.6158252, -0.1529085, 0.04780914, 0.1274182, -0.1219211, -0.3123289, -0.2219282,-0.07613826, -0.429201, + 0.2577533, -0.3356205, -0.225358, -0.1540796, 0.3155174, -0.1904664, -0.3567101, -0.6831458, 0.1244646, 0.03383783, -0.45597, -0.3350697, 0.06824276, -0.2861978,-0.06724917, -0.7046481, 0.01664764, 0.2270567, 0.2003283,-0.01544937, + 0.122865, 0.1516775, -0.4446453, -0.2338583, 0.1633447, -0.193498, -0.198088, 0.3170272, -0.5869794, 0.4013553, 0.347383, 0.3666581, 0.6890763,-0.05797414, 0.3630058, -0.319958, -0.1071812, 0.06162044, 0.03171228, 0.1275262, + -0.2986812, 0.05382598, -0.1484276, 0.4936468, 0.362756, 0.05858297, -0.1055183, 0.1090384, 0.4217073, 0.5534347, 0.3864388, 0.2085926, -0.204135, 0.05230855, -0.5290207, -0.1548485, -0.4670302, 0.2205726, 0.4380318,-0.01626632}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT, 1e-3)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} +*/ + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_1) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expVals('c', {2,2}, {3.25,5.562149, 3.25,-5.562149}, sd::DataType::DOUBLE); + NDArray expVecs('c', {2,2,2}, {-0.3094862,-0.0973726, -0.3094862,0.0973726,0,0.9459053, 0,-0.9459053}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_2) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expVals('c', {3,2}, {53.73337,0, -27.51557,0, 14.0822,0}, sd::DataType::DOUBLE); + NDArray expVecs('c', {3,3,2}, {-0.5848506,0,0.5560778,0,-0.04889745,0,-0.7978391,0,-0.7683444,0,-0.8855156,0,-0.1462962,0,0.3168979,0,-0.4620293,0}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_3) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expVals('c', {4,2}, {6.114896,4.659591,6.114896,-4.659591, -1.069896,4.45631,-1.069896,-4.45631}, sd::DataType::DOUBLE); + NDArray expVecs('c', {4,4,2}, {-0.2141303,0.4815241,-0.2141303,-0.4815241, 0.1035092,-0.4270603, 0.1035092,0.4270603, 0.2703519,-0.2892722, 0.2703519,0.2892722, -0.5256817,0.044061, -0.5256817,-0.044061, + 0.6202137,0.05521234,0.6202137,-0.05521234, -0.5756007,0.3932209,-0.5756007,-0.3932209,-0.4166034,-0.0651337, -0.4166034,0.0651337, -0.1723716,0.1138941,-0.1723716,-0.1138941}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_4) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expVals('c', {10,2}, { -13.08653,3.577011,-13.08653,-3.577011, -1.199166,8.675665,-1.199166,-8.675665,8.962244, + 5.610424, 8.962244,-5.610424, 15.19989,5.675794, 15.19989,-5.675794,16.86979,0,-5.52268,0}, sd::DataType::DOUBLE); + NDArray expVecs('c', {10,10,2}, {0.1652385,0.1439317, 0.1652385,-0.1439317, -0.198272,0.207306, -0.198272,-0.207306, 0.1861466,-0.4599919, 0.1861466,0.4599919, 0.09384053,-0.4889922, 0.09384053,0.4889922, -0.6153314,0, -0.2180209,0, + -0.1603652,-0.1466119, -0.1603652,0.1466119, 0.2817409,0.3301842, 0.2817409,-0.3301842, 0.09747303,-0.2218182, 0.09747303,0.2218182, 0.2318273,-0.3355113, 0.2318273,0.3355113, -0.4828878,0, -0.1451126,0, + -0.1866771,0.1220412, -0.1866771,-0.1220412, 0.08937842,-0.3025104, 0.08937842,0.3025104, 0.2783766,0.2258364, 0.2783766,-0.2258364, -0.1413997,-0.09596012, -0.1413997,0.09596012, -0.2286925,0, 0.3290011,0, + -0.4009741,0.238131, -0.4009741,-0.238131, -0.02772353,0.1338458, -0.02772353,-0.1338458, 0.09030543,-0.2222453, 0.09030543,0.2222453, 0.2565825,-0.2275446, 0.2565825,0.2275446, -0.2855937,0, -0.3950544,0, + 0.2168379,-0.1301121, 0.2168379,0.1301121, -0.165433,-0.1220125, -0.165433,0.1220125, -0.2685605,0.008133055,-0.2685605,-0.008133055, 0.1929395,-0.1194659, 0.1929395,0.1194659, 0.2206467,0, 0.3289105,0, + -0.3835898,-0.2478813, -0.3835898,0.2478813, 0.1923005,-0.01036433, 0.1923005,0.01036433, -0.1711637,-0.3548358, -0.1711637,0.3548358, 0.2888441,0.09625169, 0.2888441,-0.09625169, 0.2595426,0, -0.1288072,0, + 0.1033616,0.09839151, 0.1033616,-0.09839151, -0.3080167,-0.1624564, -0.3080167,0.1624564,-0.03972293,-0.03967309, -0.03972293,0.03967309, 0.1965443,0.3025898, 0.1965443,-0.3025898, 0.04587166,0, 0.499261,0, + 0.2922398,0.2461792, 0.2922398,-0.2461792, 0.2769633,-0.2745029, 0.2769633,0.2745029, 0.1034687,-0.002947149, 0.1034687,0.002947149, -0.02611308,0.1658046, -0.02611308,-0.1658046, 0.2351063,0, -0.3787892,0, + -0.2512689,-0.02169855, -0.2512689,0.02169855, -0.01481625,0.4376404, -0.01481625,-0.4376404, -0.2298635,-0.2360671, -0.2298635,0.2360671, 0.11004,-0.1467444, 0.11004,0.1467444, 0.1501568,0, 0.340117,0, + 0.325096,0.1712822, 0.325096,-0.1712822, -0.2412035,-0.09236849, -0.2412035,0.09236849, 0.3894343,-0.08673087, 0.3894343,0.08673087, 0.3125305,0.07128152, 0.3125305,-0.07128152, -0.2415555,0, 0.1841298,0,}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} +*/ + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_1) { + + NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b('c', {4,1}, {-5.,10,9,1}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {4,1}, {0.8527251, -0.2545784, -1.076495, -0.8526268}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_2) { + + NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {4,2}, {1.462913, 1.835338, 0.4083664, -2.163816, -3.344481, -3.739225, 0.5156383,0.01624954}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_3) { + + NDArray a1('c', {4,3}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray a2('c', {3,4}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b1('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray b2('c', {3,2}, {-5.,10,9,1,1.5,-2}, sd::DataType::DOUBLE); + + NDArray expX1('c', {3,2}, {0.9344955,-0.5841325, 0.8768102, 1.029137, -1.098021, 1.360152}, sd::DataType::DOUBLE); + NDArray expX2('c', {4,2}, {0.3536033,0.5270184,0,0,-0.8292221,0.967515,0.01827441,2.856337}, sd::DataType::DOUBLE); + + NDArray x1 = expX1.ulike(); + ops::helpers::FullPivLU::solve(a1,b1,x1); + ASSERT_TRUE(x1.equalsTo(&expX1)); + + NDArray x2 = expX2.ulike(); + ops::helpers::FullPivLU::solve(a2,b2,x2); + ASSERT_TRUE(x2.equalsTo(&expX2)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_4) { + + NDArray a('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray b('c', {10,2}, {-5.,10,9,1,1.5,-2,17,5,3.6,0.12, -3.1,2.27,-0.5,27.3,8.9,5,-7,8,-9,10}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {10,2}, {-0.697127, 2.58257, 2.109721,3.160622,-2.217796, -3.275736,-0.5752479, 2.475356,1.996841, -1.928947, + 2.213154,3.541014, 0.7104885, -1.981451,-3.297972,-0.4720612, 3.672657, 0.9161028, -2.322383, -1.784493}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp index 669574fa7..8150976e1 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp @@ -90,6 +90,9 @@ TEST_F(NDArrayTest, NDArrayOrder1) { auto arrayF = new NDArray(arrayC->dup('f')); auto arrayC2 = new NDArray(arrayF->dup('c')); + arrayF->syncToHost(); + arrayC2->syncToHost(); + ASSERT_EQ('c', arrayC->ordering()); ASSERT_EQ('f', arrayF->ordering()); ASSERT_EQ('c', arrayC2->ordering()); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 3421edf95..3d0df208f 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -251,7 +251,7 @@ TEST_F(NativeOpsTests, ExecPairwise_2) { auto exp = NDArrayFactory::create('c', {5, 5}); x.assign(true); y.assign(false); - y.t(5) = true; + y.r(5) = true; #ifdef __CUDABLAS__ printf("Unsupported for cuda now.\n"); #else diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index f8086c9fe..f4c8bd2fa 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -1168,6 +1168,529 @@ TEST_F(PlaygroundTests, lstmLayerCellBp_1) { } +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 0; // [sL,bS,nIn] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut] + const auto retLastH = false; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector(), {0., 1.}, GradCheck::LossFunc::MEAN); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) { + + const int sL = 4; + const int bS = 3; + const int nIn = 3; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 1; // backward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 1; // backward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 2; // bidirectional sum + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 3; // bidirectional concat + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 3; // [sL, bS, nIn] + const int directionMode = 4; // bidirectional extra output dim + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests15, gru_bp_1) { + + const int sL = 3; + const int bS = 2; + const int nIn = 5; + const int nOut = 4; + + + NDArray x('c', {sL, bS, nIn}, {0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE); + NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {3*nOut}, sd::DataType::DOUBLE); + + NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + + Wx.linspace(1,-0.1); + Wh.linspace(0.2,0.2); + b.linspace(1,-0.15); + + const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {}); + const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {}); + + sd::ops::gru opFF; + sd::ops::gru_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); +} */ diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index c4c1806bd..37facc43c 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -100,7 +100,7 @@ TEST_F(RNGTests, TestGenerator_SGA_1) { for (auto idx = 0; idx < array.lengthOf(); idx++) { float x = generator.relativeT(idx, -sd::DataTypeUtils::template max() / 10, sd::DataTypeUtils::template max() / 10); - array.t(idx) = x; + array.r(idx) = x; } auto minimum = array.reduceNumber(reduce::AMin); minimum.printBuffer("Randomly float min on 1M array"); @@ -285,7 +285,7 @@ TEST_F(RNGTests, Test_Gaussian_21) { ASSERT_NEAR(sd::math::nd4j_abs(mean->e(0)), 0.f, 0.2f); ASSERT_NEAR(variance->e(0), 1.0f, 0.2f); - + } #ifdef DEBUG_BUILD @@ -315,7 +315,7 @@ TEST_F(RNGTests, Test_Gaussian_22) { //variance0->printIndexedBuffer("Variance"); ASSERT_NEAR(sd::math::nd4j_abs(mean0->e(0)), 0.f, 1.0e-3f); ASSERT_NEAR(variance0->e(0), 1.0f, 1.e-3f); - + } TEST_F(RNGTests, Test_Gaussian_3) { @@ -431,7 +431,7 @@ TEST_F(RNGTests, Test_Truncated_21) { // result.at(0)->printBuffer("MEAN"); // result.at(1)->printBuffer("VARIANCE"); - + sd::ops::reduce_min minOp; sd::ops::reduce_max maxOp; @@ -585,7 +585,7 @@ TEST_F(RNGTests, Test_Uniform_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_Uniform_SGA_3) { @@ -614,7 +614,7 @@ TEST_F(RNGTests, Test_Gaussian_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_LogNorm_2) { @@ -634,7 +634,7 @@ TEST_F(RNGTests, Test_LogNorm_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_TruncatedNorm_2) { @@ -653,7 +653,7 @@ TEST_F(RNGTests, Test_TruncatedNorm_2) { ASSERT_TRUE(x1.isSameShape(z)); ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } @@ -674,7 +674,7 @@ TEST_F(RNGTests, Test_Binomial_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } @@ -695,7 +695,7 @@ TEST_F(RNGTests, Test_Bernoulli_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_GaussianDistribution_1) { @@ -716,7 +716,7 @@ TEST_F(RNGTests, Test_GaussianDistribution_1) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_BernoulliDistribution_1) { @@ -736,7 +736,7 @@ TEST_F(RNGTests, Test_BernoulliDistribution_1) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } @@ -787,7 +787,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_1_SGA) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_ExponentialDistribution_2_SGA) { @@ -880,7 +880,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_2) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_PoissonDistribution_1) { @@ -900,7 +900,7 @@ TEST_F(RNGTests, Test_PoissonDistribution_1) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_1) { @@ -920,7 +920,7 @@ TEST_F(RNGTests, Test_GammaDistribution_1) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_2) { @@ -941,7 +941,7 @@ TEST_F(RNGTests, Test_GammaDistribution_2) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_3) { @@ -962,7 +962,7 @@ TEST_F(RNGTests, Test_GammaDistribution_3) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_UniformDistribution_04) { @@ -980,7 +980,7 @@ TEST_F(RNGTests, Test_UniformDistribution_04) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } namespace sd { @@ -1142,7 +1142,7 @@ TEST_F(RNGTests, test_multinomial_1) { ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(expectedZ.isSameShape(outputZ)); ASSERT_TRUE(expectedZ.equalsTo(outputZ)); - + } TEST_F(RNGTests, test_multinomial_2) { @@ -1219,7 +1219,7 @@ TEST_F(RNGTests, test_multinomial_5) { RandomGenerator rng(1234, 1234); ASSERT_EQ(Status::OK(), op.execute(rng, { &probs, &samples }, { &output }, {}, { 1 }, {}, {}, false)); - + auto deviation = output.varianceNumber(variance::SummaryStatsStandardDeviation, false); auto mean = output.meanNumber(); // printf("Var: %f Mean: %f \n", deviation.e(0), mean.e(0)); @@ -1290,7 +1290,7 @@ TEST_F(RNGTests, test_multinomial_6) { ASSERT_NEAR(1.2175, deviation.e(0), 45e-3); // 1000000 35e-3); ASSERT_NEAR(2.906, mean.e(0), 45e-3); // 1000000 35e-3); - + RandomGenerator rng(1234, 1234); NDArray probs('c', { batchValue, ClassValue }, { 1., 1.5, 2., 2.5, 3. }, sd::DataType::FLOAT32); diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt index 7d3073b58..92084ef74 100644 --- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt @@ -115,7 +115,7 @@ elseif(WIN32) set(CMAKE_CXX_FLAGS " -g -fPIC -std=c++11 -Wa,-mbig-obj") endif() else() - set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -DLINUX_BUILD=true") + set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -ffast-math -DFFAST_MATH=true -DLINUX_BUILD=true") if ("${_RELEASE}" OR CMAKE_BUILD_TYPE STREQUAL "Release") message("Release build for tests") @@ -225,6 +225,17 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(MINGW) AND NOT(APPLE)) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") endif() +file(GLOB_RECURSE COMPILATION_UNITS false ../../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in) +foreach(FL_ITEM ${COMPILATION_UNITS}) + string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) + set(FL_ITEM_WLE ${CMAKE_MATCH_1}) + foreach(FL_TYPE_INDEX RANGE 0 9) + #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") + configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) + LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) + endforeach() +endforeach() + # this function strips path from file name, basically making up short file name, i.e. file.cpp function(SHORTNAME LONG_NAME OUTPUT) From deb87b04f7aeaba748d57fc6182e8f1cb9507e20 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 15 May 2020 15:34:08 +1000 Subject: [PATCH 04/21] Assorted fixes (#466) * Timeouts and fixes Signed-off-by: Alex Black * Increase default timeout to 90s due to slow PPC CI machines Signed-off-by: Alex Black * Another timeout tweak Signed-off-by: Alex Black * Svhn Signed-off-by: Alex Black --- .../main/java/org/deeplearning4j/BaseDL4JTest.java | 2 +- .../datasets/fetchers/SvhnDataFetcherTest.java | 9 +++++++-- .../optimizer/listener/TestCheckpointListener.java | 11 ++++++----- .../optimizer/listener/TestListeners.java | 5 +++++ .../nn/multilayer/MultiLayerNetwork.java | 7 +++++++ .../train/GradientSharingTrainingTest.java | 2 +- .../main/java/org/nd4j/common/tests/BaseND4JTest.java | 2 +- 7 files changed, 28 insertions(+), 10 deletions(-) diff --git a/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java b/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java index 46daaa5f5..b74df2d2c 100644 --- a/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java +++ b/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java @@ -68,7 +68,7 @@ public abstract class BaseDL4JTest { * Override this method to set the default timeout for methods in the test class */ public long getTimeoutMilliseconds(){ - return 60_000; + return 90_000; } /** diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java index 1815dff73..58587615d 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java @@ -24,17 +24,22 @@ import org.junit.rules.Timeout; import java.io.File; import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; /** * @author saudet */ public class SvhnDataFetcherTest extends BaseDL4JTest { - @Rule - public Timeout timeout = Timeout.seconds(600); + @Override + public long getTimeoutMilliseconds() { + return 480_000L; //Shouldn't take this long but slow download or drive access on CI machines may need extra time. + } @Test public void testSvhnDataFetcher() throws Exception { + assumeTrue(isIntegrationTests()); //Ignore unless integration tests - CI can get caught up on slow disk access + SvhnDataFetcher fetch = new SvhnDataFetcher(); File path = fetch.getDataSetPath(DataSetType.TRAIN); File path2 = fetch.getDataSetPath(DataSetType.TEST); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java index 5c5f9e385..131930623 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java @@ -183,11 +183,11 @@ public class TestCheckpointListener extends BaseDL4JTest { CheckpointListener l = new CheckpointListener.Builder(f) .keepLast(3) - .saveEvery(4, TimeUnit.SECONDS) + .saveEvery(4900, TimeUnit.MILLISECONDS) .build(); net.setListeners(l); - for(int i=0; i<5; i++ ){ //10 iterations total + for(int i=0; i<3; i++ ){ //10 iterations total net.fit(iter); Thread.sleep(5000); } @@ -211,9 +211,10 @@ public class TestCheckpointListener extends BaseDL4JTest { ns.add(n.getIterationCount()); } - assertEquals(3, l.availableCheckpoints().size()); - assertEquals(ns.toString(), 3, ns.size()); - assertTrue(ns.containsAll(Arrays.asList(4,6,8))); + assertEquals(2, l.availableCheckpoints().size()); + assertEquals(ns.toString(), 2, ns.size()); + System.out.println(ns); + assertTrue(ns.containsAll(Arrays.asList(2,4))); } @Test diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java index 8cd72e770..cac30a7e4 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java @@ -70,6 +70,11 @@ public class TestListeners extends BaseDL4JTest { @Rule public TemporaryFolder tempDir = new TemporaryFolder(); + @Override + public long getTimeoutMilliseconds() { + return 90000L; + } + @Test public void testSettingListenersUnsupervised() { //Pretrain layers should get copies of the listeners, in addition to the diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 54acb31d7..2091babb0 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -767,6 +767,13 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura if (!isInitCalled()) init(); + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); + } + } + solver.getOptimizer().setGradientsAccumulator(accumulator); } diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java index ab034604e..68a012b72 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java @@ -75,7 +75,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest { @Override public long getTimeoutMilliseconds() { - return 90000L; + return 180000L; } @Test diff --git a/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java b/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java index 54bad9876..eceec6216 100644 --- a/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java +++ b/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java @@ -55,7 +55,7 @@ public abstract class BaseND4JTest { * Override this method to set the default timeout for methods in the test class */ public long getTimeoutMilliseconds(){ - return 60_000; + return 90_000; } /** From 51ce6927fd03f4fc42f498b02dd28475ecab1df5 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 16 May 2020 10:44:58 +0300 Subject: [PATCH 05/21] FP Mod (#468) * mod Signed-off-by: raver119@gmail.com * couple of tests for updated mod Signed-off-by: raver119@gmail.com --- libnd4j/include/ops/ops.h | 13 +-- .../layers_tests/PlaygroundTests.cpp | 2 - .../layers_tests/PrimitivesTests.cpp | 92 +++++++++++++++++++ 3 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp diff --git a/libnd4j/include/ops/ops.h b/libnd4j/include/ops/ops.h index 21cd07c40..ea52e9ba0 100644 --- a/libnd4j/include/ops/ops.h +++ b/libnd4j/include/ops/ops.h @@ -919,17 +919,12 @@ namespace simdOps { template class Mod { public: - /* - - // just a optional note, feel free to remove later - - op_def static half op(half d1, half d2, half *params) { - return __float2half(simdOps::Mod::op(__half2float(d1), __half2float(d2), nullptr)); - } - */ op_def static Z op(X d1, Y d2) { - return static_cast(d1) % static_cast(d2); + auto dx = static_cast(d2); + auto f = sd::math::nd4j_floor(d1 / dx); + auto r = f * dx; + return d1 - r; } op_def static Z op(X d1, Y d2, Z *params) { diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index f4c8bd2fa..91ddcbd30 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -59,8 +59,6 @@ public: int poolSize = 10; PlaygroundTests() { - printf("\n"); - fflush(stdout); } }; diff --git a/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp b/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp new file mode 100644 index 000000000..f131a1520 --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp @@ -0,0 +1,92 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver110@gmail.com +// + +#include "testlayers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sd; +using namespace sd::graph; + +class PrimitivesTests : public testing::Test { + public: + + PrimitivesTests() { + } +}; + +TEST_F(PrimitivesTests, test_mod_1) { + int ix = 7; + int iy = 3; + + + auto v = simdOps::Mod::op(ix, iy); + + ASSERT_EQ(7 % 3, v); +} + +TEST_F(PrimitivesTests, test_mod_2) { + float ix = 7.f; + float iy = 3.f; + + + auto e = sd::math::nd4j_fmod(ix, iy); + auto v = simdOps::Mod::op(ix, iy); + + ASSERT_NEAR(e, v, 1e-5f); +} + +TEST_F(PrimitivesTests, test_mod_3) { + float ix = 7.f; + float iy = 0.f; + + + auto e = sd::math::nd4j_fmod(ix, iy); + auto v = simdOps::Mod::op(ix, iy); + + // absence of SIGFPE will be a good enough +} \ No newline at end of file From 4bdd5cb8ff4b08b80031abebfd88d4da8307f7f9 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Sat, 16 May 2020 22:44:31 +1000 Subject: [PATCH 06/21] Add SameDiff file format ADR [WIP] (#467) * Add SameDiff file format ADR Signed-off-by: Alex Black * Update 0001-SameDiff_File_Format.md * Update Signed-off-by: Alex Black --- nd4j/ADRs/0001-SameDiff_File_Format.md | 100 +++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 nd4j/ADRs/0001-SameDiff_File_Format.md diff --git a/nd4j/ADRs/0001-SameDiff_File_Format.md b/nd4j/ADRs/0001-SameDiff_File_Format.md new file mode 100644 index 000000000..84dcfb6c9 --- /dev/null +++ b/nd4j/ADRs/0001-SameDiff_File_Format.md @@ -0,0 +1,100 @@ +# SameDiff file format proposal + +## Status +Accepted + +Proposed by: Alex Black (15-05-2020) + +Discussed with: raver119 + +## Context + +SameDiff models need to be serializable - i.e., something we can save to disk or send over the network. +Additionally, we need to be able to save and load model files in C++, and have those be readable in other languages (mainly Java). + +Currently, we have a FlatBuffers-based format for SameDiff graph serialization, but it has a number of problems, as discussed in this issue: https://github.com/eclipse/deeplearning4j/issues/8312 + + +## Decision + +We will transition from a pure FlatBuffers to a Zip + FlatBuffers model format. + +FlatBuffers will be used for the graph structure only. Parameters will be stored separately to the graph structure, also within the zip. + +We will introduce the ability to support multiple versions of a graph in the model files. +This will enable the model file to support storing +* Multiple data types (for example, a FP32 version and a quantized INT8 version) +* Multiple different checkpoints (parameters after 1000 iterations, after 5000, and so on) +* Multiple versions of a given model (English vs. Chinese, or cased/uncased, etc) + +By default when loading a graph (unless it is otherwise specified) we will load the most recent model tag. +Tags must be valid file/folder identifiers, and are not case sensitive. + + +The structure of the zip file will be as follows: +``` +tags.txt //List of graph tags, one per line, in UTF8 format, no duplicates. Oldest first, newest last +/graph.fb //The graph structure, in FlatBuffers format +/params.txt //The mapping between variable names and parameter file names +/params/*.fb //The set of NDArrays that are the parameters, in FlatBuffers format +/trainingConfig.fb //The training configuration - updater, learning rate, etc +/updater.txt //The mapping between variable names and the updater state file names +/updater/*.fb //The set of NDArrays that are the updater state +``` + +Note that params.txt will allow for parameter sharing via references to other parameters: +``` +my_normal_param 0 +shared_param /7 +``` +This means the parameters values for parameter "my_normal_param" are present at `/params/0.fb` within the zip file, and the parameter values for "shared_param" are available at `/params/7.fb` + +Note also that the motivation for using the params.txt file (instead of the raw parameter name as the file name) is that some parameters will have invalid or ambiguous file names - "my/param/name", "&MyParam*" etc + +In terms of updater state, they will be stored in a similar format. For example, for the Adam updater with the M and V state arrays (each of same shape as the parameter): +``` +my_param 0 1 +other_param 2 3 +``` +That means my_param(M) is `/updater/0.fb` and my_param(V) is at `/updater/1.fb` +This format also allows for updater state sharing, if we need it. + + +**Graph Structure** + +The graph structure will be encoded in FlatBuffers format using a schema with 2 parts: +1. A list of variables - each with name, datatype, and (for placeholders, constants and parameters) a shape +2. A list of operations - each with a name, op name/type, input variable names, output variable names, and arguments + +Note that both legacy and custom ops will be encoded in the same way. For legacy ops, we simply need the operation type, and the operation number. + +Operation argument encoding will be done using named arguments: essentially, a `Map` structure, where T is one of `{long, double, boolean, datatype}`. +This allows for improved backward compatibility (no ambiguity as ops are modified after a graph file was written) and improved interpretability compared to using simple arrays of iargs, bargs, targs and dargs. +One consequence/downside of this is that we need to define mapping between our named arguments and iargs/bargs/targs/dargs. In Java we have essentially done this manually, though clearly don't want to replicate this work in C++ (or any future languages). + +To avoid the need to do a significant amount of work (such as moving the name/arg mapping to code generation) the following is proposed: +The `Map` is split up in the FlatBuffers schema into 4 pairs of fields. +* `String[] iArgNames`, `long[] iArgs` +* `String[] tArgNames`, `double[] dArgs` +* `String[] bArgNames`, `boolean[] bArgs` +* `String[] dArgNames`, `DataType[] dArgs` + +Clearly the name and value arrays (for each pair) would each be the same length, and name/value correspondence is by array index. + +This is essentially equivalent to the `Map` representation, but has the benefit of not needing us to define the mapping for named args to array-style args any time soon in C++, but also allowing us to add it in the future (mainly before we can write graphs from C++, or have better/proper backward compatibility after op changes) + + +**Extensibility to Other Types** + +Suppose in the future we want to store other data for a variable, not just an array? +Examples include lists and maps (for example, for NLP applications). + +While we will not implement this right now, there are a number of options for adding this without breaking backward compatibility. + +First: we can enhance the params.txt file format, perhaps using something like the following: +``` +map_param 0 MAP +``` + +Second: We can add a similar text file for other types. For example, a params_maps.txt, same format as params.txt, with content at `/params_maps/*.fb` + From 6e9c849e4a4e6ae87c8876b6b3929d6581c20f39 Mon Sep 17 00:00:00 2001 From: Paul Dubs Date: Mon, 18 May 2020 07:46:46 +0200 Subject: [PATCH 07/21] Fix typo (#469) --- .../org/deeplearning4j/nn/modelimport/keras/KerasModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index 8aa38439c..b57171a14 100644 --- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -171,7 +171,7 @@ public class KerasModel { importTrainingConfiguration(trainingJson); else log.warn("If enforceTrainingConfig is true, a training " + "configuration object has to be provided. Usually the only practical way to do this is to store" + - " your keras model with `model.save('model_path.h5'. If you store model config and weights" + + " your keras model with `model.save('model_path.h5')`. If you store model config and weights" + " separately no training configuration is attached."); } From ec757f654d92e9d4199adf17690420525fd5bd53 Mon Sep 17 00:00:00 2001 From: Andrii T <39699084+atuzhykov@users.noreply.github.com> Date: Tue, 19 May 2020 17:18:52 +0300 Subject: [PATCH 08/21] Tensorflow import tests and fixes (#435) * ignored ops checked Signed-off-by: Andrii Tuzhykov * reconfigured AdjustContrast + commented primitive_gru Signed-off-by: Andrii Tuzhykov * minor changes + exception ops commented Signed-off-by: Andrii Tuzhykov * figured out non existent tf ops and random ops check Signed-off-by: Andrii Tuzhykov * minor changes to tensorflowop and randomness cheks Signed-off-by: Andrii Tuzhykov * deconv2d tensorfloname removed * Fix Flatbuffers ser/de with character fields Signed-off-by: Alex Black * TFGraphTestAllSameDiff tests passed except NonMaxSuppression Signed-off-by: Andrii Tuzhykov * minor changes Signed-off-by: Andrii Tuzhykov * temporary ignored section added Signed-off-by: Andrii Tuzhykov * ignores removed Signed-off-by: Andrii Tuzhykov * org.nd4j.base.Preconditions -> org.nd4j.common.base.Preconditions Signed-off-by: Andrii Tuzhykov * temsorflownames reverts and replace CopyHost * ignored mod op tests due to known issue Signed-off-by: Andrii Tuzhykov * rsestored mod after fixing in cpp level Signed-off-by: Andrii Tuzhykov * ignored random_shuffle op test due to known issue Signed-off-by: Andrii Tuzhykov * increased random_uniform mean/std comparator sensitivity Signed-off-by: Andrii Tuzhykov * igmored random tests due to SameDiff RNG seed is not set. Signed-off-by: Andrii Tuzhykov Co-authored-by: Alex Black --- .../functions/DifferentialFunction.java | 4 + .../samediff/serde/FlatBuffersMapper.java | 2 + .../autodiff/validation/OpValidation.java | 7 +- .../converters/ImportClassMapping.java | 1 - .../linalg/api/ops/custom/AdjustContrast.java | 37 ++++++-- .../api/ops/custom/AdjustContrastV2.java | 44 --------- .../api/ops/custom/BaseAdjustContrast.java | 52 ----------- .../api/ops/custom/CompareAndBitpack.java | 12 +++ .../linalg/api/ops/custom/RgbToGrayscale.java | 4 - .../nd4j/linalg/api/ops/custom/RgbToYiq.java | 5 - .../nd4j/linalg/api/ops/custom/RgbToYuv.java | 5 - .../nd4j/linalg/api/ops/custom/YiqToRgb.java | 5 - .../nd4j/linalg/api/ops/custom/YuvToRgb.java | 4 - .../ops/impl/image/NonMaxSuppressionV3.java | 2 +- .../ops/impl/layers/convolution/DeConv2D.java | 5 - .../transforms/custom/IsNonDecreasing.java | 6 -- .../pairwise/arithmetic/CopyOp.java | 2 +- .../ops/impl/transforms/same/Identity.java | 2 +- .../segment/UnsortedSegmentMean.java | 4 - .../segment/UnsortedSegmentSqrtN.java | 5 - .../api/ops/random/custom/RandomGamma.java | 4 +- .../api/ops/random/impl/DropOutInverted.java | 6 -- .../ops/random/impl/UniformDistribution.java | 6 -- .../TFGraphs/TFGraphTestAllHelper.java | 63 ++++++++++++- .../TFGraphs/TFGraphTestAllSameDiff.java | 93 ++++++++----------- .../nd4j/linalg/custom/CustomOpsTests.java | 16 ---- 26 files changed, 158 insertions(+), 238 deletions(-) delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java index 54707887f..f4f2d6c6b 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java @@ -261,6 +261,10 @@ public abstract class DifferentialFunction { if(target.getType() == float.class && value instanceof Double){ value = ((Double) value).floatValue(); } + //Edge case: we store char fields as integers, rather than introduce an extra property + if(target.getType() == char.class && value instanceof Integer){ + value = (char)((Integer)value).intValue(); + } target.set(this,value); } catch (IllegalAccessException e) { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java index 7f44962f0..6253c700d 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java @@ -483,6 +483,8 @@ public class FlatBuffersMapper { //No op } else if (v instanceof Boolean) { b = new boolean[]{(Boolean) v}; + } else if(v instanceof Character){ + i = new int[]{(Character)v}; } else if (v instanceof Number) { if (v instanceof Double) { d = new double[]{(Double) v}; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java index 386ead0b3..21154d8ac 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java @@ -1220,7 +1220,12 @@ public class OpValidation { "absargmax", "absargmin", "entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon - "count_zero" + "count_zero", + + "SaveV2", + "LoadV2", + "RestoreV2", + "RandomCrop" // NotImplementedError: Op RandomCrop is not available in GraphDef version 134. It has been removed in version 8. Random crop is now pure Python. ); return new HashSet<>(list); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java index 63138719c..630b5986d 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java @@ -625,7 +625,6 @@ public class ImportClassMapping { org.nd4j.linalg.api.ops.compat.CompatSparseToDense.class, org.nd4j.linalg.api.ops.compat.CompatStringSplit.class, org.nd4j.linalg.api.ops.custom.AdjustContrast.class, - org.nd4j.linalg.api.ops.custom.AdjustContrastV2.class, org.nd4j.linalg.api.ops.custom.HsvToRgb.class, org.nd4j.linalg.api.ops.custom.RgbToHsv.class, org.nd4j.linalg.api.ops.custom.RgbToYiq.class, diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java index f842303ca..1dfeca5dc 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java @@ -1,4 +1,3 @@ - /* ****************************************************************************** * Copyright (c) 2019 Konduit K.K. * @@ -19,14 +18,27 @@ package org.nd4j.linalg.api.ops.custom; import lombok.NonNull; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.base.Preconditions; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; -public class AdjustContrast extends BaseAdjustContrast { +import java.util.Collections; +import java.util.List; - public AdjustContrast() {super();} +public class AdjustContrast extends DynamicCustomOp { + + public AdjustContrast() { + super(); + } public AdjustContrast(@NonNull INDArray in, double factor, INDArray out) { - super(in, factor, out); + Preconditions.checkArgument(in.rank() >= 3, + "AdjustContrast: op expects rank of input array to be >= 3, but got %s instead", in.rank()); + inputArguments.add(in); + outputArguments.add(out); + + addTArgument(factor); } public AdjustContrast(@NonNull INDArray in, double factor) { @@ -34,21 +46,28 @@ public class AdjustContrast extends BaseAdjustContrast { } public AdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable in, @NonNull SDVariable factor) { - super(sameDiff,new SDVariable[]{in,factor}); + super(sameDiff, new SDVariable[]{in, factor}); } public AdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable in, double factor) { - super(sameDiff,new SDVariable[]{in}); + super(sameDiff, new SDVariable[]{in}); addTArgument(factor); } @Override public String opName() { - return "adjust_contrast"; + return "adjust_contrast_v2"; } @Override - public String tensorflowName() { - return "AdjustContrast"; + public String[] tensorflowNames() { + return new String[]{"AdjustContrast", "AdjustContrastv2"}; + } + + @Override + public List calculateOutputDataTypes(List inputDataTypes) { + int n = args().length; + Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == n, "Expected %s input data types for %s, got %s", n, getClass(), inputDataTypes); + return Collections.singletonList(inputDataTypes.get(0)); } } \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java deleted file mode 100644 index 34b495970..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java +++ /dev/null @@ -1,44 +0,0 @@ -/* ****************************************************************************** - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ -package org.nd4j.linalg.api.ops.custom; - -import lombok.NonNull; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; - -public class AdjustContrastV2 extends BaseAdjustContrast { - - public AdjustContrastV2() {super();} - - public AdjustContrastV2(@NonNull INDArray in, double factor, INDArray out) { - super(in, factor, out); - } - - public AdjustContrastV2(@NonNull SameDiff sameDiff, @NonNull SDVariable in, @NonNull SDVariable factor) { - super( sameDiff,new SDVariable[]{in,factor}); - } - - @Override - public String opName() { - return "adjust_contrast_v2"; - } - - @Override - public String tensorflowName() { - return "AdjustContrastv2"; - } -} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java deleted file mode 100644 index 80c344fe2..000000000 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java +++ /dev/null @@ -1,52 +0,0 @@ -/* ****************************************************************************** - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ -package org.nd4j.linalg.api.ops.custom; - -import lombok.NonNull; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.common.base.Preconditions; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.DynamicCustomOp; - -import java.util.Collections; -import java.util.List; - -public abstract class BaseAdjustContrast extends DynamicCustomOp { - public BaseAdjustContrast() { - } - - public BaseAdjustContrast(@NonNull INDArray in, double factor, INDArray out) { - Preconditions.checkArgument(in.rank() >= 3, - "AdjustContrast: op expects rank of input array to be >= 3, but got %s instead", in.rank()); - inputArguments.add(in); - outputArguments.add(out); - - addTArgument(factor); - } - - public BaseAdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable[] vars) { - super("", sameDiff, vars); - } - - @Override - public List calculateOutputDataTypes(List inputDataTypes){ - int n = args().length; - Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == n, "Expected %s input data types for %s, got %s", n, getClass(), inputDataTypes); - return Collections.singletonList(inputDataTypes.get(0)); - } -} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java index e8285fe9b..d30c0fe80 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java @@ -17,10 +17,15 @@ package org.nd4j.linalg.api.ops.custom; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.base.Preconditions; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; +import java.util.Collections; +import java.util.List; + public class CompareAndBitpack extends DynamicCustomOp { public CompareAndBitpack() {} @@ -47,4 +52,11 @@ public class CompareAndBitpack extends DynamicCustomOp { public String tensorflowName() { return "CompareAndBitpack"; } + + @Override + public List calculateOutputDataTypes(List dataTypes){ + Preconditions.checkState(dataTypes != null && dataTypes.size() == 2, "Expected exactly 2 input datatypes for %s, got input %s", getClass(), dataTypes); + Preconditions.checkState(dataTypes.get(0) == dataTypes.get(1), "Input data types must be the same: got %s", dataTypes); + return Collections.singletonList(DataType.UINT8); + } } \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java index 6b71ba17f..f0e8c3022 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java @@ -37,8 +37,4 @@ public class RgbToGrayscale extends DynamicCustomOp { return "rgb_to_grs"; } - @Override - public String tensorflowName() { - return "RgbToGrs"; - } } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java index 628e770ee..3a2ca46cf 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java @@ -42,11 +42,6 @@ public class RgbToYiq extends DynamicCustomOp { return "rgb_to_yiq"; } - @Override - public String tensorflowName() { - return "RgbToYiq"; - } - @Override public List calculateOutputDataTypes(List inputDataTypes){ int n = args().length; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java index 8c7ed7353..679e1d3e5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java @@ -42,11 +42,6 @@ public class RgbToYuv extends DynamicCustomOp { return "rgb_to_yuv"; } - @Override - public String tensorflowName() { - return "RgbToYuv"; - } - @Override public List calculateOutputDataTypes(List inputDataTypes){ int n = args().length; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java index bca9999a8..3f647dfbe 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java @@ -41,11 +41,6 @@ public class YiqToRgb extends DynamicCustomOp { return "yiq_to_rgb"; } - @Override - public String tensorflowName() { - return "YiqToRgb"; - } - @Override public List calculateOutputDataTypes(List inputDataTypes){ int n = args().length; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java index d6e52771c..1776a7b85 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java @@ -42,10 +42,6 @@ public class YuvToRgb extends DynamicCustomOp { return "yuv_to_rgb"; } - @Override - public String tensorflowName() { - return "YuvToRgb"; - } @Override public List calculateOutputDataTypes(List inputDataTypes){ diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java index d087287cf..77c8642cf 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java @@ -53,7 +53,7 @@ public class NonMaxSuppressionV3 extends DynamicCustomOp { @Override public String[] tensorflowNames() { - return new String[]{"NonMaxSuppressionV3","NonMaxSuppressionV4"}; + return new String[]{"NonMaxSuppressionV3","NonMaxSuppressionV4","NonMaxSuppressionV5"}; } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java index 1e082f6f3..8e2d82105 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java @@ -306,11 +306,6 @@ public class DeConv2D extends DynamicCustomOp { return "ConvTranspose"; } - @Override - public String tensorflowName() { - return "Conv2DTranspose"; - } - @Override public List doDiff(List f1) { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java index 0a9360670..b3627f2db 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java @@ -62,12 +62,6 @@ public class IsNonDecreasing extends DynamicCustomOp { return "is_non_decreasing"; } - - @Override - public String tensorflowName() { - return "IsNonDecreasing"; - } - @Override public List doDiff(List f1) { return Collections.singletonList(sameDiff.zerosLike(arg())); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java index 3ee75d23d..3f3c4754c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java @@ -78,7 +78,7 @@ public class CopyOp extends BaseTransformSameOp { @Override public String[] tensorflowNames() { - return new String[]{"Copy","DeepCopy","CopyHost"}; + return new String[]{"Copy"}; } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java index f9744d8ce..d555e27e7 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java @@ -64,7 +64,7 @@ public class Identity extends BaseDynamicTransformOp { @Override public String[] tensorflowNames() { - return new String[]{"Identity"}; + return new String[]{"Identity", "DeepCopy", "CopyHost"}; } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java index 637eff3bb..84c7e6ab1 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java @@ -55,10 +55,6 @@ public class UnsortedSegmentMean extends DynamicCustomOp { return "unsorted_segment_mean"; } - @Override - public String tensorflowName() { - return "UnsortedSegmentMean"; - } @Override public List doDiff(List gradients){ diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java index 64b6d2427..9fa88b788 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java @@ -55,11 +55,6 @@ public class UnsortedSegmentSqrtN extends DynamicCustomOp { return "unsorted_segment_sqrt_n"; } - @Override - public String tensorflowName() { - return "UnsortedSegmentSqrtN"; - } - @Override public List doDiff(List gradients){ return new UnsortedSegmentSqrtNBp(sameDiff, arg(0), arg(1), gradients.get(0), numSegments).outputs(); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java index 7be70e218..bb2676ba9 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java @@ -71,9 +71,7 @@ public class RandomGamma extends DynamicCustomOp { @Override public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map attributesForNode, GraphDef graph) { - if(attributesForNode.containsKey("alpha")) { - outputDataType = DataTypeAdapter.dtypeConv(attributesForNode.get("alpha").getType()); - } + outputDataType = DataTypeAdapter.dtypeConv(attributesForNode.get("T").getType()); } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java index 6b174ae63..53de3559f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java @@ -84,12 +84,6 @@ public class DropOutInverted extends BaseRandomOp { return "Dropout"; } - @Override - public String tensorflowName() { - return "Dropout"; - } - - @Override public List doDiff(List f1) { return null; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java index 84fade263..bf1863dda 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java @@ -100,12 +100,6 @@ public class UniformDistribution extends BaseRandomOp { throw new NoOpNameFoundException("No onnx op opName found for " + opName()); } - @Override - public String tensorflowName() { - return "RandomUniformGG"; - } - - @Override public List doDiff(List f1) { return Collections.emptyList(); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java index 1cc3baa13..c93b3deb7 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java @@ -851,7 +851,68 @@ public class TFGraphTestAllHelper { return (t, s) -> Nd4j.sort(t, true).equals(Nd4j.sort(s, true)); } - if(modelName.startsWith("alpha_dropout") || modelName.startsWith("layers_dropout") || modelName.equals("dropout")) + if(modelName.startsWith("empty")){ + return (t, s) -> { + boolean areEqualShapes = t.equalShapes(s); + boolean areEqualDataTypes = t.dataType() == s.dataType(); + return areEqualShapes && areEqualDataTypes; + }; } + + // sum of all elements along dimesions before and after shuffle has to be the same + if(modelName.startsWith("random_shuffle")){ + return (t, s) -> Nd4j.sort(t, true).equals(Nd4j.sort(s, true)); + } + + if(modelName.startsWith("random_normal")){ + return (t, s) -> { + boolean areEqualShapes = t.equalShapes(s); + double meanS = s.meanNumber().doubleValue(); + double meanT = t.meanNumber().doubleValue(); + double stdS = s.stdNumber().doubleValue(); + double stdT = t.stdNumber().doubleValue(); + double eps = 1; + return areEqualShapes && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps); + }; } + + if(modelName.startsWith("random_gamma")){ + return (t, s) -> { + boolean areEqualShapes = t.equalShapes(s); + boolean nonNegativeValues = (t.minNumber().doubleValue() > 0) && (t.minNumber().doubleValue() > 0); + double meanS = s.meanNumber().doubleValue(); + double meanT = t.meanNumber().doubleValue(); + double stdS = s.stdNumber().doubleValue(); + double stdT = t.stdNumber().doubleValue(); + double eps = 1; + return areEqualShapes && nonNegativeValues && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps); + }; + } + + if(modelName.startsWith("random_poisson") || modelName.startsWith("random_poisson_v2")){ + return (t, s) -> { + boolean areEqualShapes = t.equalShapes(s); + boolean nonNegativeValues = (t.minNumber().doubleValue() >= 0) && (t.minNumber().doubleValue() >= 0); + double meanS = s.meanNumber().doubleValue(); + double meanT = t.meanNumber().doubleValue(); + double stdS = s.stdNumber().doubleValue(); + double stdT = t.stdNumber().doubleValue(); + double eps = 1; + return areEqualShapes && nonNegativeValues && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps); + }; + } + + if(modelName.startsWith("random_uniform")|| modelName.startsWith("random_uniform_int")){ + return (t, s) -> { + boolean areEqualShapes = t.equalShapes(s); + double meanS = s.meanNumber().doubleValue(); + double meanT = t.meanNumber().doubleValue(); + double stdS = s.stdNumber().doubleValue(); + double stdT = t.stdNumber().doubleValue(); + double eps = 1; + return areEqualShapes && (Math.abs(stdS-stdT) < eps) && (Math.abs(meanS-meanT) < eps); + }; + } + + if(modelName.startsWith("alpha_dropout") || modelName.startsWith("layers_dropout") || modelName.startsWith("dropout")) //We can't compare dropout using simple equality due to randomness return (t, s) -> { double[] tfNums = t.ravel().toDoubleVector(); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java index 72c705852..92ba319ed 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java @@ -66,23 +66,29 @@ public class TFGraphTestAllSameDiff { //Note: Can't extend BaseNd4jTest here a public static final String[] IGNORE_REGEXES = new String[]{ //Failing 2019/07/01 - Issue 10, https://github.com/deeplearning4j/deeplearning4j/issues/6958 //Still failing 2019/09/11 + //Still failing 2020/04/27 + //java.lang.IllegalStateException: Requested output variable LogMatrixDeterminant:1 does not exist in SameDiff instance "slogdet/.*", //Failing 2019/09/11 - https://github.com/eclipse/deeplearning4j/issues/7965 + // Still failing 2020/04/27 java.lang.IllegalStateException: Requested output variable Bincount does not exist in SameDiff instance "bincount/.*", // Failing 2019/11/14 https://github.com/eclipse/deeplearning4j/issues/8393 "is_strictly_increasing/emptyArrayTest/.*", //TODO floormod and truncatemod behave differently - i.e., "c" vs. "python" semantics. Need to check implementations too + // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: TruncateMod "truncatemod/.*", //Still failing as of 2019/09/11 - https://github.com/deeplearning4j/deeplearning4j/issues/6464 - not sure if related to: https://github.com/deeplearning4j/deeplearning4j/issues/6447 "cnn2d_nn/nhwc_b1_k12_s12_d12_SAME", //2019/09/11 - No tensorflow op found for SparseTensorDenseAdd + // 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: SparseTensorDenseAdd "confusion/.*", //2019/09/11 - Couple of tests failing (InferenceSession issues) + // Still failing 2020/04/27 Requested output variable concat does not exist in SameDiff instance "rnn/bstack/d_.*", //2019/05/21 - Failing on AVX2/512 intermittently (Linux, OSX), passing elsewhere @@ -97,87 +103,68 @@ public class TFGraphTestAllSameDiff { //Note: Can't extend BaseNd4jTest here a "g_11", //2019/07/09 - Need "Multinomial" op - https://github.com/eclipse/deeplearning4j/issues/7913 + // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: Multinomial "multinomial/.*", //2019/11/04 AB - disabled, pending libnd4j deconv3d_tf implementation + // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find descriptor for op: deconv3d_tf - class: org.nd4j.linalg.api.ops.impl.layers.convolution.DeConv3DTF "conv3d_transpose.*", //2019/11/15 - mapping is not present yet https://github.com/eclipse/deeplearning4j/issues/8397 + // Still failing 2020/04/27 java.lang.AssertionError: Predictions do not match on ragged/reduce_mean/2d_a1, node RaggedReduceMean/truediv "ragged/reduce_mean/.*", // 2019/11/15 - missing dtype argument in nd4j, tests are useless https://github.com/eclipse/deeplearning4j/issues/8398 - "zeros_like/rank2_float32_dtype_int.*", + // Still failing 2020/04/27 java.lang.IndexOutOfBoundsException: 1 + "zeros_like/rank2_float32_dtype_int.*", // 11.26.2019 failing - https://github.com/eclipse/deeplearning4j/issues/8453 + // Still failing 2020/04/27 java.lang.AssertionError: Predictions do not match on roll/rank2_float32_zeroshift, node Roll "roll/.*", // 11.26.2019 failing https://github.com/eclipse/deeplearning4j/issues/8455 + // still failing 2020/04/27 + // java.lang.IllegalStateException: Failed to calculate output shapes for op matrix_band_part (MatrixBandPart) - no shapes were returned by calculateOutputShape() "matrix_band_part/.*", // 12.20.2019 - https://github.com/eclipse/deeplearning4j/issues/8559 + // Still failing 2020/27/04 java.lang.AssertionError: Predictions do not match on fused_batch_norm/float32_nhcw, node FusedBatchNormV3 "fused_batch_norm/.*", - // AB 2020/01/04 - https://github.com/eclipse/deeplearning4j/issues/8592 - "emptyArrayTests/reshape/rank2_shape2-0_2-0--1", + // 01.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8898 + "primitive_gru", - //AB 2020/01/07 - Known issues - "bitcast/from_float64_to_int64", - "bitcast/from_rank2_float64_to_int64", - "bitcast/from_float64_to_uint64", + // 05.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8921 + "random_poisson/rank1_float16", "random_poisson/rank1_float32", "random_poisson/rank1_float16", "random_poisson/rank1_half", + "random_poisson_v2/rank1_float64", "random_poisson_v2/rank1_float16", "random_poisson_v2/rank1_half", - - //NEWLY ADDED TESTCASES from 27/04/2020 - "non_max_suppression_v2/.*", "non_max_suppression/.*", + //08.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8927 "random_gamma/.*", - "non_max_suppression_v5/.*", - "non_max_suppression_v4/.*", - "non_max_suppression_v3/.*", - "dropout/.*", - "max_pool_with_argmax/.*", - "conv2d_transpose/.*", + + //08.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8928 "Conv3DBackpropInputV2/.*", - "Conv3DBackpropInput/.*", - "mod/.*", - "leaky_relu/.*", - "DeepCopy/.*", - "empty/.*", - "ones_like/.*", - "is_non_decreasing/.*", - "div/.*", - "lgamma/.*", + + //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8940 + "compare_and_bitpack/.*", + + //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8943 + "max_pool_with_argmax/int64_int64_padding_SAME", "max_pool_with_argmax/int32_int64_padding_SAME", + + //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8946 + "non_max_suppression_v4/.*","non_max_suppression_v5/.*", + + // 18.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8960 + "random_shuffle/.*", + // 18.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8963 "random_uniform/.*", "random_uniform_int/.*", - "resize_area/.*", - "zeros_like_tf1/.*", - "Conv2DTranspose/.*", - "rgb_to_yuv/.*", - "rgb_to_grayscale/.*", - "rgb_to_yiq/.*", - "losses/.*", - "yiq_to_rgb/.*", - "yuv_to_rgb/.*", - "emptyArrayTests/.*", "random_normal/.*", - "random_shuffle/.*", - "random_poisson_v2/.*", + "random_gamma/.*", "random_poisson/.*", - "random_crop/.*", - "compare_and_bitpack/.*", - "adjust_contrast/.*", - "confusion/.*", - "bitcast/.*", - "roll/.*", - "matrix_band_part/.*", - "conv3d_transpose_layers/.*", - "multinomial/.*", - "unsorted_segment/.*", - "cnn2d_nn/.*", - "truncatemod/.*", - "bincount/.*", - "slogdet/.*", - "adjust_contrast_v2/.*" + "random_poisson/.*", + "random_poisson_v2/.*", -}; + }; /* As per TFGraphTestList.printArraysDebugging - this field defines a set of regexes for test cases that should have all arrays printed during execution. diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java index 12658ede8..534b08e25 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java @@ -847,22 +847,6 @@ public class CustomOpsTests extends BaseNd4jTest { assertArrayEquals(new long[]{256, 256, 3}, lsd.get(0).getShape()); } - @Test - public void testAdjustContrastV2() { - INDArray in = Nd4j.linspace(DataType.DOUBLE,1.0,1.0, 4*4*3).reshape(4,4,3); - INDArray out = Nd4j.createUninitialized(4,4,3); - - INDArray expected = Nd4j.createFromArray(new double[]{-21.5, -20.5, -19.5, -15.5, -14.5, -13.5, -9.5, -8.5, -7.5, -3.5, -2.5, -1.5, - 2.5, 3.5, 4.5, 8.5, 9.5, 10.5, 14.5, 15.5, 16.5, 20.5, 21.5, 22.5, - 26.5, 27.5, 28.5, 32.5, 33.5, 34.5, 38.5, 39.5, 40.5, 44.5, 45.5, 46.5, - 50.5, 51.5, 52.5, 56.5, 57.5, 58.5, 62.5, 63.5, 64.5, 68.5, 69.5, 70.5 - }).reshape(4,4,3); - - Nd4j.exec(new AdjustContrastV2(in, 2.0, out)); - - assertArrayEquals(out.shape(), in.shape()); - assertEquals(expected, out); - } @Ignore("AS 11/13/2019 https://github.com/eclipse/deeplearning4j/issues/8374") @Test From 0bc9785508b17e13ca7f58dfe8f3bc061bca89e1 Mon Sep 17 00:00:00 2001 From: Yurii Shyrma Date: Tue, 19 May 2020 21:56:41 +0300 Subject: [PATCH 09/21] mkldnn concat call cases correction (#471) * - disable mkldnn concat when number of input arrays > 3072 Signed-off-by: Yurii * - get rid of loop in calculating of input arrays number Signed-off-by: Yurii --- libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp index 9df63556e..3bf97e586 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp @@ -178,7 +178,11 @@ PLATFORM_CHECK(concat, ENGINE_CPU) { const auto zType = z->dataType(); - return z->rankOf() < 7 && (zType==DataType::FLOAT32 || zType==DataType::HALF || zType==DataType::BFLOAT16 || zType==DataType::UINT8 || zType==DataType::INT8); + const bool isAxisInLastArr = block.getBArguments()->size() == 0 ? false : B_ARG(0); + const int numOfInArrs = isAxisInLastArr ? block.width() - 1 : block.width(); + + return z->rankOf() < 7 && numOfInArrs <= 3072 + && (zType==DataType::FLOAT32 || zType==DataType::HALF || zType==DataType::BFLOAT16 || zType==DataType::UINT8 || zType==DataType::INT8); } } From bde0a4ec98d9a20cda19b25bb214ae49356647ec Mon Sep 17 00:00:00 2001 From: Fariz Rahman Date: Thu, 21 May 2020 05:47:12 +0400 Subject: [PATCH 10/21] Python4j (#422) * types * pom fix * basic exec + tests * safe exec * exec fixes + tests * prim tests * lists and dicts * collections tests * list test * api * exec and return all vars * context manager + fixes * leak fixes * jobs tests * gc basic working * more gc fixed * copyright headers * try-catch-finally * gc fixes * validate var name (startswith _collapsed..) * try block refac * pythonexecutioner nits * hashset->set * call() gc fix * gc fixes * type check fix * types fixes * refacs * rem numpyarray * threadsafety check * private->public * threadsafe checks * pythonGC test * threading fixes + tests * threading tests+ * threading test fixes * make PythonException unchecked * nits * docstrings * path fixes --- pom.xml | 1 + python4j/pom.xml | 66 ++ python4j/python4j-core/pom.xml | 44 ++ .../java/org/eclipse/python4j/Python.java | 611 ++++++++++++++++++ .../python4j/PythonContextManager.java | 241 +++++++ .../org/eclipse/python4j/PythonException.java | 52 ++ .../eclipse/python4j/PythonExecutioner.java | 342 ++++++++++ .../java/org/eclipse/python4j/PythonGC.java | 137 ++++ .../java/org/eclipse/python4j/PythonGIL.java | 93 +++ .../java/org/eclipse/python4j/PythonJob.java | 175 +++++ .../org/eclipse/python4j/PythonObject.java | 244 +++++++ .../java/org/eclipse/python4j/PythonType.java | 47 ++ .../org/eclipse/python4j/PythonTypes.java | 344 ++++++++++ .../org/eclipse/python4j/PythonVariable.java | 64 ++ .../eclipse/python4j/pythonexec/pythonexec.py | 36 ++ .../test/java/PythonBasicExecutionTest.java | 108 ++++ .../src/test/java/PythonCollectionsTest.java | 62 ++ .../test/java/PythonContextManagerTest.java | 51 ++ .../src/test/java/PythonGCTest.java | 54 ++ .../src/test/java/PythonJobTest.java | 287 ++++++++ .../src/test/java/PythonMultiThreadTest.java | 169 +++++ .../test/java/PythonPrimitiveTypesTest.java | 82 +++ python4j/python4j-numpy/pom.xml | 42 ++ 23 files changed, 3352 insertions(+) create mode 100644 python4j/pom.xml create mode 100644 python4j/python4j-core/pom.xml create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java create mode 100644 python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py create mode 100644 python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonCollectionsTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonContextManagerTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonGCTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonJobTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonMultiThreadTest.java create mode 100644 python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java create mode 100644 python4j/python4j-numpy/pom.xml diff --git a/pom.xml b/pom.xml index ab9f80b92..184eeb11f 100644 --- a/pom.xml +++ b/pom.xml @@ -137,6 +137,7 @@ jumpy pydatavec pydl4j + python4j diff --git a/python4j/pom.xml b/python4j/pom.xml new file mode 100644 index 000000000..57af8f1bb --- /dev/null +++ b/python4j/pom.xml @@ -0,0 +1,66 @@ + + + + + + deeplearning4j + org.deeplearning4j + 1.0.0-SNAPSHOT + + 4.0.0 + + org.eclipse + python4j-parent + pom + + python4j-core + python4j-numpy + + + + + org.projectlombok + lombok + ${lombok.version} + provided + + + ch.qos.logback + logback-classic + ${logback.version} + test + + + junit + junit + ${junit.version} + test + + + commons-io + commons-io + ${commons-io.version} + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + \ No newline at end of file diff --git a/python4j/python4j-core/pom.xml b/python4j/python4j-core/pom.xml new file mode 100644 index 000000000..b429d8272 --- /dev/null +++ b/python4j/python4j-core/pom.xml @@ -0,0 +1,44 @@ + + + + + + + python4j-parent + org.eclipse + 1.0.0-SNAPSHOT + + jar + 4.0.0 + + python4j-core + + + org.json + json + 20190722 + + + org.bytedeco + cpython-platform + ${cpython-platform.version} + + + + \ No newline at end of file diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java new file mode 100644 index 000000000..fd6fff112 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java @@ -0,0 +1,611 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +package org.eclipse.python4j; + +import org.bytedeco.cpython.PyObject; + +import java.util.Collections; +import java.util.List; + +import static org.bytedeco.cpython.global.python.*; + + +public class Python { + + static { + new PythonExecutioner(); + } + + /** + * Imports a python module, similar to python import statement. + * + * @param moduleName name of the module to be imported + * @return reference to the module object + */ + public static PythonObject importModule(String moduleName) { + PythonGIL.assertThreadSafe(); + PythonObject module = new PythonObject(PyImport_ImportModule(moduleName)); + if (module.isNone()) { + throw new PythonException("Error importing module: " + moduleName); + } + return module; + } + + /** + * Gets a builtins attribute + * + * @param attrName Attribute name + * @return + */ + public static PythonObject attr(String attrName) { + PythonGIL.assertThreadSafe(); + PyObject builtins = PyImport_ImportModule("builtins"); + try { + return new PythonObject(PyObject_GetAttrString(builtins, attrName)); + } finally { + Py_DecRef(builtins); + } + } + + + /** + * Gets the size of a PythonObject. similar to len() in python. + * + * @param pythonObject + * @return + */ + public static PythonObject len(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + long n = PyObject_Size(pythonObject.getNativePythonObject()); + if (n < 0) { + throw new PythonException("Object has no length: " + pythonObject); + } + return PythonTypes.INT.toPython(n); + } + + /** + * Gets the string representation of an object. + * + * @param pythonObject + * @return + */ + public static PythonObject str(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + try { + return PythonTypes.STR.toPython(pythonObject.toString()); + } catch (Exception e) { + throw new RuntimeException(e); + } + + + } + + /** + * Returns an empty string + * + * @return + */ + public static PythonObject str() { + PythonGIL.assertThreadSafe(); + try { + return PythonTypes.STR.toPython(""); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Returns the str type object + * @return + */ + public static PythonObject strType() { + return attr("str"); + } + + /** + * Returns a floating point number from a number or a string. + * @param pythonObject + * @return + */ + public static PythonObject float_(PythonObject pythonObject) { + return PythonTypes.FLOAT.toPython(PythonTypes.FLOAT.toJava(pythonObject)); + } + + /** + * Reutrns 0. + * @return + */ + public static PythonObject float_() { + try { + return PythonTypes.FLOAT.toPython(0d); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + /** + * Returns the float type object + * @return + */ + public static PythonObject floatType() { + return attr("float"); + } + + + /** + * Converts a value to a Boolean value i.e., True or False, using the standard truth testing procedure. + * @param pythonObject + * @return + */ + public static PythonObject bool(PythonObject pythonObject) { + return PythonTypes.BOOL.toPython(PythonTypes.BOOL.toJava(pythonObject)); + + } + + /** + * Returns False. + * @return + */ + public static PythonObject bool() { + return PythonTypes.BOOL.toPython(false); + + } + + /** + * Returns the bool type object + * @return + */ + public static PythonObject boolType() { + return attr("bool"); + } + + /** + * Returns an integer from a number or a string. + * @param pythonObject + * @return + */ + public static PythonObject int_(PythonObject pythonObject) { + return PythonTypes.INT.toPython(PythonTypes.INT.toJava(pythonObject)); + } + + /** + * Returns 0 + * @return + */ + public static PythonObject int_() { + return PythonTypes.INT.toPython(0L); + + } + + /** + * Returns the int type object + * @return + */ + public static PythonObject intType() { + return attr("int"); + } + + /** + * Takes sequence types and converts them to lists. + * @param pythonObject + * @return + */ + public static PythonObject list(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + try (PythonGC _ = PythonGC.watch()) { + PythonObject listF = attr("list"); + PythonObject ret = listF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Object is not iterable: " + pythonObject.toString()); + } + return ret; + } + } + + /** + * Returns empty list. + * @return + */ + public static PythonObject list() { + return PythonTypes.LIST.toPython(Collections.emptyList()); + } + + /** + * Returns list type object. + * @return + */ + public static PythonObject listType() { + return attr("list"); + } + + /** + * Creates a dictionary. + * @param pythonObject + * @return + */ + public static PythonObject dict(PythonObject pythonObject) { + PythonObject dictF = attr("dict"); + PythonObject ret = dictF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build dict from object: " + pythonObject.toString()); + } + dictF.del(); + return ret; + } + + /** + * Returns empty dict + * @return + */ + public static PythonObject dict() { + return PythonTypes.DICT.toPython(Collections.emptyMap()); + } + + /** + * Returns dict type object. + * @return + */ + public static PythonObject dictType() { + return attr("dict"); + } + + /** + * Creates a set. + * @param pythonObject + * @return + */ + public static PythonObject set(PythonObject pythonObject) { + PythonObject setF = attr("set"); + PythonObject ret = setF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build set from object: " + pythonObject.toString()); + } + setF.del(); + return ret; + } + + /** + * Returns empty set. + * @return + */ + public static PythonObject set() { + PythonObject setF = attr("set"); + PythonObject ret; + ret = setF.call(); + setF.del(); + return ret; + } + + /** + * Returns empty set. + * @return + */ + public static PythonObject setType() { + return attr("set"); + } + + /** + * Creates a bytearray. + * @param pythonObject + * @return + */ + public static PythonObject bytearray(PythonObject pythonObject) { + PythonObject baF = attr("bytearray"); + PythonObject ret = baF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build bytearray from object: " + pythonObject.toString()); + } + baF.del(); + return ret; + } + + /** + * Returns empty bytearray. + * @return + */ + public static PythonObject bytearray() { + PythonObject baF = attr("bytearray"); + PythonObject ret; + ret = baF.call(); + baF.del(); + return ret; + } + + /** + * Returns bytearray type object + * @return + */ + public static PythonObject bytearrayType() { + return attr("bytearray"); + } + + /** + * Creates a memoryview. + * @param pythonObject + * @return + */ + public static PythonObject memoryview(PythonObject pythonObject) { + PythonObject mvF = attr("memoryview"); + PythonObject ret = mvF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build memoryview from object: " + pythonObject.toString()); + } + mvF.del(); + return ret; + } + + /** + * Returns memoryview type object. + * @return + */ + public static PythonObject memoryviewType() { + return attr("memoryview"); + } + + /** + * Creates a byte string. + * @param pythonObject + * @return + */ + public static PythonObject bytes(PythonObject pythonObject) { + PythonObject bytesF = attr("bytes"); + PythonObject ret = bytesF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build bytes from object: " + pythonObject.toString()); + } + bytesF.del(); + return ret; + } + + /** + * Returns empty byte string. + * @return + */ + public static PythonObject bytes() { + PythonObject bytesF = attr("bytes"); + PythonObject ret; + ret = bytesF.call(); + bytesF.del(); + return ret; + } + + /** + * Returns bytes type object + * @return + */ + public static PythonObject bytesType() { + return attr("bytes"); + } + + /** + * Creates a tuple. + * @param pythonObject + * @return + */ + public static PythonObject tuple(PythonObject pythonObject) { + PythonObject tupleF = attr("tupleF"); + PythonObject ret = tupleF.call(pythonObject); + if (ret.isNone()) { + throw new PythonException("Cannot build tuple from object: " + pythonObject.toString()); + } + tupleF.del(); + return ret; + } + + /** + * Returns empty tuple. + * @return + */ + public static PythonObject tuple() { + PythonObject tupleF = attr("tuple"); + PythonObject ret; + ret = tupleF.call(); + tupleF.del(); + return ret; + } + + /** + * Returns tuple type object + * @return + */ + public static PythonObject tupleType() { + return attr("tuple"); + } + + /** + * Creates an Exception + * @param pythonObject + * @return + */ + public static PythonObject Exception(PythonObject pythonObject) { + PythonObject excF = attr("Exception"); + PythonObject ret = excF.call(pythonObject); + excF.del(); + return ret; + } + + /** + * Creates an Exception + * @return + */ + public static PythonObject Exception() { + PythonObject excF = attr("Exception"); + PythonObject ret; + ret = excF.call(); + excF.del(); + return ret; + } + + /** + * Returns Exception type object + * @return + */ + public static PythonObject ExceptionType() { + return attr("Exception"); + } + + + /** + * Returns the globals dictionary. + * @return + */ + public static PythonObject globals() { + PythonGIL.assertThreadSafe(); + PyObject main = PyImport_ImportModule("__main__"); + PyObject globals = PyModule_GetDict(main); + Py_DecRef(main); + return new PythonObject(globals, false); + } + + /** + * Returns the type of an object. + * @param pythonObject + * @return + */ + public static PythonObject type(PythonObject pythonObject) { + PythonObject typeF = attr("type"); + PythonObject ret = typeF.call(pythonObject); + typeF.del(); + return ret; + } + + /** + * Returns True if the specified object is of the specified type, otherwise False. + * @param obj + * @param type + * @return + */ + public static boolean isinstance(PythonObject obj, PythonObject... type) { + PythonGIL.assertThreadSafe(); + PyObject argsTuple = PyTuple_New(type.length); + try { + for (int i = 0; i < type.length; i++) { + PythonObject x = type[i]; + Py_IncRef(x.getNativePythonObject()); + PyTuple_SetItem(argsTuple, i, x.getNativePythonObject()); + } + return PyObject_IsInstance(obj.getNativePythonObject(), argsTuple) != 0; + } finally { + Py_DecRef(argsTuple); + } + + } + + /** + * Evaluates the specified expression. + * @param expression + * @return + */ + public static PythonObject eval(String expression) { + + PythonGIL.assertThreadSafe(); + PyObject compiledCode = Py_CompileString(expression, "", Py_eval_input); + PyObject main = PyImport_ImportModule("__main__"); + PyObject globals = PyModule_GetDict(main); + PyObject locals = PyDict_New(); + try { + return new PythonObject(PyEval_EvalCode(compiledCode, globals, locals)); + } finally { + Py_DecRef(main); + Py_DecRef(locals); + Py_DecRef(compiledCode); + } + + } + + /** + * Returns the builtins module + * @return + */ + public static PythonObject builtins() { + return importModule("builtins"); + + } + + /** + * Returns None. + * @return + */ + public static PythonObject None() { + return eval("None"); + } + + /** + * Returns True. + * @return + */ + public static PythonObject True() { + return eval("True"); + } + + /** + * Returns False. + * @return + */ + public static PythonObject False() { + return eval("False"); + } + + /** + * Returns True if the object passed is callable callable, otherwise False. + * @param pythonObject + * @return + */ + public static boolean callable(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + return PyCallable_Check(pythonObject.getNativePythonObject()) == 1; + } + + + public static void setContext(String context){ + PythonContextManager.setContext(context); + } + + public static String getCurrentContext() { + return PythonContextManager.getCurrentContext(); + } + + public static void deleteContext(String context){ + PythonContextManager.deleteContext(context); + } + public static void resetContext() { + PythonContextManager.reset(); + } + + /** + * Executes a string of code. + * @param code + * @throws PythonException + */ + public static void exec(String code) throws PythonException { + PythonExecutioner.exec(code); + } + + /** + * Executes a string of code. + * @param code + * @param inputs + * @param outputs + */ + public static void exec(String code, List inputs, List outputs){ + PythonExecutioner.exec(code, inputs, outputs); + } + + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java new file mode 100644 index 000000000..a34d8a239 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java @@ -0,0 +1,241 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + +import javax.lang.model.SourceVersion; + + +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Emulates multiples interpreters in a single interpreter. + * This works by simply obfuscating/de-obfuscating variable names + * such that only the required subset of the global namespace is "visible" + * at any given time. + * By default, there exists a "main" context emulating the default interpreter + * + * @author Fariz Rahman + */ + + +public class PythonContextManager { + + private static Set contexts = new HashSet<>(); + private static AtomicBoolean init = new AtomicBoolean(false); + private static String currentContext; + private static final String MAIN_CONTEXT = "main"; + private static final String COLLAPSED_KEY = "__collapsed__"; + + static { + init(); + } + + private static void init() { + if (init.get()) return; + new PythonExecutioner(); + init.set(true); + currentContext = MAIN_CONTEXT; + contexts.add(currentContext); + } + + + /** + * Adds a new context. + * @param contextName + */ + public static void addContext(String contextName) { + if (!validateContextName(contextName)) { + throw new PythonException("Invalid context name: " + contextName); + } + contexts.add(contextName); + } + + /** + * Returns true if context exists, else false. + * @param contextName + * @return + */ + public static boolean hasContext(String contextName) { + return contexts.contains(contextName); + } + + private static boolean validateContextName(String s) { + return SourceVersion.isIdentifier(s) && !s.startsWith(COLLAPSED_KEY); + } + + private static String getContextPrefix(String contextName) { + return COLLAPSED_KEY + contextName + "__"; + } + + private static String getCollapsedVarNameForContext(String varName, String contextName) { + return getContextPrefix(contextName) + varName; + } + + private static String expandCollapsedVarName(String varName, String contextName) { + String prefix = COLLAPSED_KEY + contextName + "__"; + return varName.substring(prefix.length()); + + } + + private static void collapseContext(String contextName) { + try (PythonGC _ = PythonGC.watch()) { + PythonObject globals = Python.globals(); + PythonObject pop = globals.attr("pop"); + PythonObject keysF = globals.attr("keys"); + PythonObject keys = keysF.call(); + PythonObject keysList = Python.list(keys); + int numKeys = Python.len(keysList).toInt(); + for (int i = 0; i < numKeys; i++) { + PythonObject key = keysList.get(i); + String keyStr = key.toString(); + if (!((keyStr.startsWith("__") && keyStr.endsWith("__")) || keyStr.startsWith("__collapsed_"))) { + String collapsedKey = getCollapsedVarNameForContext(keyStr, contextName); + PythonObject val = pop.call(key); + + PythonObject pyNewKey = new PythonObject(collapsedKey); + globals.set(pyNewKey, val); + } + } + } catch (Exception pe) { + throw new RuntimeException(pe); + } + } + + private static void expandContext(String contextName) { + try (PythonGC _ = PythonGC.watch()) { + String prefix = getContextPrefix(contextName); + PythonObject globals = Python.globals(); + PythonObject pop = globals.attr("pop"); + PythonObject keysF = globals.attr("keys"); + + PythonObject keys = keysF.call(); + + PythonObject keysList = Python.list(keys); + try (PythonGC __ = PythonGC.pause()) { + int numKeys = Python.len(keysList).toInt(); + + for (int i = 0; i < numKeys; i++) { + PythonObject key = keysList.get(i); + String keyStr = key.toString(); + if (keyStr.startsWith(prefix)) { + String expandedKey = expandCollapsedVarName(keyStr, contextName); + PythonObject val = pop.call(key); + PythonObject newKey = new PythonObject(expandedKey); + globals.set(newKey, val); + } + } + } + } + } + + + /** + * Activates the specified context + * @param contextName + */ + public static void setContext(String contextName) { + if (contextName.equals(currentContext)) { + return; + } + if (!hasContext(contextName)) { + addContext(contextName); + } + + + collapseContext(currentContext); + + expandContext(contextName); + currentContext = contextName; + + } + + /** + * Activates the main context + */ + public static void setMainContext() { + setContext(MAIN_CONTEXT); + + } + + /** + * Returns the current context's name. + * @return + */ + public static String getCurrentContext() { + return currentContext; + } + + /** + * Resets the current context. + */ + public static void reset() { + String tempContext = "___temp__context___"; + String currContext = currentContext; + setContext(tempContext); + deleteContext(currContext); + setContext(currContext); + } + + /** + * Deletes the specified context. + * @param contextName + */ + public static void deleteContext(String contextName) { + if (contextName.equals(currentContext)) { + throw new PythonException("Cannot delete current context!"); + } + if (!contexts.contains(contextName)) { + return; + } + String prefix = getContextPrefix(contextName); + PythonObject globals = Python.globals(); + PythonObject keysList = Python.list(globals.attr("keys").call()); + int numKeys = Python.len(keysList).toInt(); + for (int i = 0; i < numKeys; i++) { + PythonObject key = keysList.get(i); + String keyStr = key.toString(); + if (keyStr.startsWith(prefix)) { + globals.attr("__delitem__").call(key); + } + } + contexts.remove(contextName); + } + + /** + * Deletes all contexts except the main context. + */ + public static void deleteNonMainContexts() { + setContext(MAIN_CONTEXT); // will never fail + for (String c : contexts.toArray(new String[0])) { + if (!c.equals(MAIN_CONTEXT)) { + deleteContext(c); // will never fail + } + } + + } + + /** + * Returns the names of all contexts. + * @return + */ + public String[] getContexts() { + return contexts.toArray(new String[0]); + } + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java new file mode 100644 index 000000000..a9bbf596c --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +/** + * Thrown when an exception occurs in python land + */ +public class PythonException extends RuntimeException { + public PythonException(String message) { + super(message); + } + + private static String getExceptionString(PythonObject exception) { + try (PythonGC gc = PythonGC.watch()) { + if (Python.isinstance(exception, Python.ExceptionType())) { + String exceptionClass = Python.type(exception).attr("__name__").toString(); + String message = exception.toString(); + return exceptionClass + ": " + message; + } + return exception.toString(); + } catch (Exception e) { + throw new RuntimeException("An error occurred while trying to create a PythonException.", e); + } + } + + public PythonException(PythonObject exception) { + this(getExceptionString(exception)); + } + + public PythonException(String message, Throwable cause) { + super(message, cause); + } + + public PythonException(Throwable cause) { + super(cause); + } +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java new file mode 100644 index 000000000..57e1a22ae --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java @@ -0,0 +1,342 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +package org.eclipse.python4j; + +import org.bytedeco.cpython.PyObject; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.io.IOUtils; +import org.bytedeco.cpython.global.python; + +import static org.bytedeco.cpython.global.python.*; +import static org.bytedeco.cpython.global.python.PyImport_ImportModule; +import static org.bytedeco.cpython.helper.python.Py_SetPath; + + +public class PythonExecutioner { + private final static String PYTHON_EXCEPTION_KEY = "__python_exception__"; + private static AtomicBoolean init = new AtomicBoolean(false); + private final static String DEFAULT_PYTHON_PATH_PROPERTY = "org.eclipse.python4j.path"; + private final static String JAVACPP_PYTHON_APPEND_TYPE = "org.eclipse.python4j.path.append"; + private final static String DEFAULT_APPEND_TYPE = "before"; + + static { + init(); + } + + private static synchronized void init() { + if (init.get()) { + return; + } + init.set(true); + initPythonPath(); + PyEval_InitThreads(); + Py_InitializeEx(0); + } + + /** + * Sets a variable. + * + * @param name + * @param value + */ + public static void setVariable(String name, PythonObject value) { + PythonGIL.assertThreadSafe(); + PyObject main = PyImport_ImportModule("__main__"); + PyObject globals = PyModule_GetDict(main); + PyDict_SetItemString(globals, name, value.getNativePythonObject()); + Py_DecRef(main); + + } + + /** + * Sets given list of PythonVariables in the interpreter. + * + * @param pyVars + */ + public static void setVariables(List pyVars) { + for (PythonVariable pyVar : pyVars) + setVariable(pyVar.getName(), pyVar.getPythonObject()); + } + + /** + * Sets given list of PythonVariables in the interpreter. + * + * @param pyVars + */ + public static void setVariables(PythonVariable... pyVars) { + setVariables(Arrays.asList(pyVars)); + } + + /** + * Gets the given list of PythonVariables from the interpreter. + * + * @param pyVars + */ + public static void getVariables(List pyVars) { + for (PythonVariable pyVar : pyVars) + pyVar.setValue(getVariable(pyVar.getName(), pyVar.getType()).getValue()); + } + + /** + * Gets the given list of PythonVariables from the interpreter. + * + * @param pyVars + */ + public static void getVariables(PythonVariable... pyVars) { + getVariables(Arrays.asList(pyVars)); + } + + /** + * Gets the variable with the given name from the interpreter. + * + * @param name + * @return + */ + public static PythonObject getVariable(String name) { + PythonGIL.assertThreadSafe(); + PyObject main = PyImport_ImportModule("__main__"); + PyObject globals = PyModule_GetDict(main); + PyObject pyName = PyUnicode_FromString(name); + try { + if (PyDict_Contains(globals, pyName) == 1) { + return new PythonObject(PyObject_GetItem(globals, pyName), false); + } + } finally { + Py_DecRef(main); + //Py_DecRef(globals); + Py_DecRef(pyName); + } + return new PythonObject(null); + } + + /** + * Gets the variable with the given name from the interpreter. + * + * @param name + * @return + */ + public static PythonVariable getVariable(String name, PythonType type) { + PythonObject val = getVariable(name); + return new PythonVariable<>(name, type, type.toJava(val)); + } + + /** + * Executes a string of code + * + * @param code + */ + public static synchronized void simpleExec(String code) { + PythonGIL.assertThreadSafe(); + int result = PyRun_SimpleStringFlags(code, null); + if (result != 0) { + throw new PythonException("Execution failed, unable to retrieve python exception."); + } + } + + private static void throwIfExecutionFailed() { + PythonObject ex = getVariable(PYTHON_EXCEPTION_KEY); + if (ex != null && !ex.isNone() && !ex.toString().isEmpty()) { + setVariable(PYTHON_EXCEPTION_KEY, PythonTypes.STR.toPython("")); + throw new PythonException(ex); + } + } + + + private static String getWrappedCode(String code) { + + try (InputStream is = PythonExecutioner.class + .getResourceAsStream("pythonexec/pythonexec.py")) { + String base = IOUtils.toString(is, StandardCharsets.UTF_8); + String indentedCode = " " + code.replace("\n", "\n "); + String out = base.replace(" pass", indentedCode); + return out; + } catch (IOException e) { + throw new IllegalStateException("Unable to read python code!", e); + } + + } + + /** + * Executes a string of code. Throws PythonException if execution fails. + * + * @param code + */ + public static void exec(String code) { + simpleExec(getWrappedCode(code)); + throwIfExecutionFailed(); + } + + public static void exec(String code, List inputs, List outputs) { + if (inputs != null) { + setVariables(inputs.toArray(new PythonVariable[0])); + } + exec(code); + if (outputs != null) { + getVariables(outputs.toArray(new PythonVariable[0])); + } + } + + /** + * Return list of all supported variables in the interpreter. + * + * @return + */ + public static List getAllVariables() { + PythonGIL.assertThreadSafe(); + List ret = new ArrayList<>(); + PyObject main = PyImport_ImportModule("__main__"); + PyObject globals = PyModule_GetDict(main); + PyObject keys = PyDict_Keys(globals); + PyObject keysIter = PyObject_GetIter(keys); + try { + + long n = PyObject_Size(globals); + for (int i = 0; i < n; i++) { + PyObject pyKey = PyIter_Next(keysIter); + try { + if (!new PythonObject(pyKey, false).toString().startsWith("_")) { + + PyObject pyVal = PyObject_GetItem(globals, pyKey); // TODO check ref count + PythonType pt; + try { + pt = PythonTypes.getPythonTypeForPythonObject(new PythonObject(pyVal, false)); + + } catch (PythonException pe) { + pt = null; + } + if (pt != null) { + ret.add( + new PythonVariable<>( + new PythonObject(pyKey, false).toString(), + pt, + pt.toJava(new PythonObject(pyVal, false)) + ) + ); + } + } + } finally { + Py_DecRef(pyKey); + } + } + } finally { + Py_DecRef(keysIter); + Py_DecRef(keys); + Py_DecRef(main); + return ret; + } + + } + + + /** + * Executes a string of code and returns a list of all supported variables. + * + * @param code + * @param inputs + * @return + */ + public static List execAndReturnAllVariables(String code, List inputs) { + setVariables(inputs); + simpleExec(getWrappedCode(code)); + return getAllVariables(); + } + + /** + * Executes a string of code and returns a list of all supported variables. + * + * @param code + * @return + */ + public static List execAndReturnAllVariables(String code) { + simpleExec(getWrappedCode(code)); + return getAllVariables(); + } + + private static synchronized void initPythonPath() { + try { + String path = System.getProperty(DEFAULT_PYTHON_PATH_PROPERTY); + if (path == null) { + File[] packages = cachePackages(); + + //// TODO: fix in javacpp + File sitePackagesWindows = new File(python.cachePackage(), "site-packages"); + File[] packages2 = new File[packages.length + 1]; + for (int i = 0; i < packages.length; i++) { + //System.out.println(packages[i].getAbsolutePath()); + packages2[i] = packages[i]; + } + packages2[packages.length] = sitePackagesWindows; + //System.out.println(sitePackagesWindows.getAbsolutePath()); + packages = packages2; + ////////// + + Py_SetPath(packages); + } else { + StringBuffer sb = new StringBuffer(); + File[] packages = cachePackages(); + JavaCppPathType pathAppendValue = JavaCppPathType.valueOf(System.getProperty(JAVACPP_PYTHON_APPEND_TYPE, DEFAULT_APPEND_TYPE).toUpperCase()); + switch (pathAppendValue) { + case BEFORE: + for (File cacheDir : packages) { + sb.append(cacheDir); + sb.append(java.io.File.pathSeparator); + } + + sb.append(path); + break; + case AFTER: + sb.append(path); + + for (File cacheDir : packages) { + sb.append(cacheDir); + sb.append(java.io.File.pathSeparator); + } + break; + case NONE: + sb.append(path); + break; + } + + Py_SetPath(sb.toString()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private enum JavaCppPathType { + BEFORE, AFTER, NONE + } + + private static File[] cachePackages() throws IOException { + File[] path = org.bytedeco.cpython.global.python.cachePackages(); + path = Arrays.copyOf(path, path.length + 1); + path[path.length - 1] = cachePackage(); + return path; + } + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java new file mode 100644 index 000000000..5531b67d3 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java @@ -0,0 +1,137 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +package org.eclipse.python4j; + +import org.bytedeco.cpython.PyObject; +import org.bytedeco.javacpp.Pointer; + +import java.io.Closeable; +import java.util.HashSet; +import java.util.Set; + +import static org.bytedeco.cpython.global.python.*; + +/** + * Wrap your code in a try-with-PythonGC block for automatic GC: + * ``` + * try(PythonGC gc = PythonGC.lock()){ + * // your code here + * } + * + * If a PythonObject created inside such a block has to be used outside + * the block, use PythonGC.keep() to exclude that object from GC. + * + * ``` + * PythonObject pyObj; + * + * try(PythonGC gc = PythonG.lock()){ + * // do stuff + * pyObj = someFunction(); + * PythonGC.keep(pyObj); + * } + * + */ +public class PythonGC implements Closeable { + + private PythonGC previousFrame = null; + private boolean active = true; + private static PythonGC currentFrame = new PythonGC(); + + private Set objects = new HashSet<>(); + + private boolean alreadyRegistered(PyObject pyObject) { + if (objects.contains(pyObject)) { + return true; + } + if (previousFrame == null) { + return false; + } + return previousFrame.alreadyRegistered(pyObject); + + } + + private void addObject(PythonObject pythonObject) { + if (!active) return; + if (Pointer.isNull(pythonObject.getNativePythonObject()))return; + if (alreadyRegistered(pythonObject.getNativePythonObject())) { + return; + } + objects.add(pythonObject.getNativePythonObject()); + } + + public static void register(PythonObject pythonObject) { + currentFrame.addObject(pythonObject); + } + + public static void keep(PythonObject pythonObject) { + currentFrame.objects.remove(pythonObject.getNativePythonObject()); + if (currentFrame.previousFrame != null) { + currentFrame.previousFrame.addObject(pythonObject); + } + } + + private PythonGC() { + } + + public static PythonGC watch() { + PythonGC ret = new PythonGC(); + ret.previousFrame = currentFrame; + ret.active = currentFrame.active; + currentFrame = ret; + return ret; + } + + private void collect() { + for (PyObject pyObject : objects) { + // TODO find out how globals gets collected here + if (pyObject.equals(Python.globals().getNativePythonObject())) continue; +// try{ +// System.out.println(PythonTypes.STR.toJava(new PythonObject(pyObject, false))); +// }catch (Exception e){} + Py_DecRef(pyObject); + + } + this.objects = new HashSet<>(); + } + + @Override + public void close() { + if (active) collect(); + currentFrame = previousFrame; + } + + public static boolean isWatching() { + if (!currentFrame.active) return false; + return currentFrame.previousFrame != null; + } + + public static PythonGC pause() { + PythonGC pausedFrame = new PythonGC(); + pausedFrame.active = false; + pausedFrame.previousFrame = currentFrame; + currentFrame = pausedFrame; + return pausedFrame; + } + + public static void resume() { + if (currentFrame.active) { + throw new RuntimeException("GC not paused!"); + } + currentFrame = currentFrame.previousFrame; + } +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java new file mode 100644 index 000000000..46b3db431 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java @@ -0,0 +1,93 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +import org.bytedeco.cpython.PyThreadState; +import org.omg.SendingContext.RunTime; + +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.bytedeco.cpython.global.python.*; + + +public class PythonGIL implements AutoCloseable { + private static PyThreadState mainThreadState; + private static final AtomicBoolean acquired = new AtomicBoolean(); + private boolean acquiredByMe = false; + private static long defaultThreadId = -1; + + public static void assertThreadSafe() { + if (acquired.get()) { + return; + } + if (defaultThreadId == -1) { + defaultThreadId = Thread.currentThread().getId(); + } else if (defaultThreadId != Thread.currentThread().getId()) { + throw new RuntimeException("Attempt to use Python4j from multiple threads without " + + "acquiring GIL. Enclose your code in a try(PythonGIL gil = PythonGIL.lock()){...}" + + " block to ensure that GIL is acquired in multi-threaded environments."); + } + + + } + + static { + new PythonExecutioner(); + } + + private PythonGIL() { + while (acquired.get()) { + try { + Thread.sleep(10); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + acquire(); + acquired.set(true); + acquiredByMe = true; + + } + + @Override + public void close() { + if (acquiredByMe) { + release(); + acquired.set(false); + acquiredByMe = false; + } + + } + + public static synchronized PythonGIL lock() { + return new PythonGIL(); + } + + private static synchronized void acquire() { + mainThreadState = PyEval_SaveThread(); + PyThreadState ts = PyThreadState_New(mainThreadState.interp()); + PyEval_RestoreThread(ts); + PyThreadState_Swap(ts); + } + + private static void release() { // do not synchronize! + PyEval_SaveThread(); + PyEval_RestoreThread(mainThreadState); + } +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java new file mode 100644 index 000000000..cdbb1b81d --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java @@ -0,0 +1,175 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import javax.annotation.Nonnull; +import java.util.List; + + +@Data +@NoArgsConstructor +/** + * PythonJob is the right abstraction for executing multiple python scripts + * in a multi thread stateful environment. The setup-and-run mode allows your + * "setup" code (imports, model loading etc) to be executed only once. + */ +public class PythonJob { + + private String code; + private String name; + private String context; + private boolean setupRunMode; + private PythonObject runF; + + static { + new PythonExecutioner(); + } + + @Builder + /** + * @param name Name for the python job. + * @param code Python code. + * @param setupRunMode If true, the python code is expected to have two methods: setup(), which takes no arguments, + * and run() which takes some or no arguments. setup() method is executed once, + * and the run() method is called with the inputs(if any) per transaction, and is expected to return a dictionary + * mapping from output variable names (str) to output values. + * If false, the full script is run on each transaction and the output variables are obtained from the global namespace + * after execution. + */ + public PythonJob(@Nonnull String name, @Nonnull String code, boolean setupRunMode){ + this.name = name; + this.code = code; + this.setupRunMode = setupRunMode; + context = "__job_" + name; + if (PythonContextManager.hasContext(context)) { + throw new PythonException("Unable to create python job " + name + ". Context " + context + " already exists!"); + } + if (setupRunMode) setup(); + } + + + /** + * Clears all variables in current context and calls setup() + */ + public void clearState(){ + String context = this.context; + PythonContextManager.setContext("main"); + PythonContextManager.deleteContext(context); + this.context = context; + setup(); + } + + public void setup(){ + try (PythonGIL gil = PythonGIL.lock()) { + PythonContextManager.setContext(context); + PythonObject runF = PythonExecutioner.getVariable("run"); + if (runF == null || runF.isNone() || !Python.callable(runF)) { + PythonExecutioner.exec(code); + runF = PythonExecutioner.getVariable("run"); + } + if (runF.isNone() || !Python.callable(runF)) { + throw new PythonException("run() method not found! " + + "If a PythonJob is created with 'setup and run' " + + "mode enabled, the associated python code is " + + "expected to contain a run() method " + + "(with or without arguments)."); + } + this.runF = runF; + PythonObject setupF = PythonExecutioner.getVariable("setup"); + if (!setupF.isNone()) { + setupF.call(); + } + } + } + + public void exec(List inputs, List outputs) { + try (PythonGIL gil = PythonGIL.lock()) { + try (PythonGC _ = PythonGC.watch()) { + PythonContextManager.setContext(context); + + if (!setupRunMode) { + + PythonExecutioner.exec(code, inputs, outputs); + + return; + } + PythonExecutioner.setVariables(inputs); + + PythonObject inspect = Python.importModule("inspect"); + PythonObject getfullargspec = inspect.attr("getfullargspec"); + PythonObject argspec = getfullargspec.call(runF); + PythonObject argsList = argspec.attr("args"); + PythonObject runargs = Python.dict(); + int argsCount = Python.len(argsList).toInt(); + for (int i = 0; i < argsCount; i++) { + PythonObject arg = argsList.get(i); + PythonObject val = Python.globals().get(arg); + if (val.isNone()) { + throw new PythonException("Input value not received for run() argument: " + arg.toString()); + } + runargs.set(arg, val); + } + PythonObject outDict = runF.callWithKwargs(runargs); + PythonObject globals = Python.globals(); + PythonObject updateF = globals.attr("update"); + updateF.call(outDict); + PythonExecutioner.getVariables(outputs); + } + } + + } + + public List execAndReturnAllVariables(List inputs){ + try (PythonGIL gil = PythonGIL.lock()) { + try (PythonGC _ = PythonGC.watch()) { + PythonContextManager.setContext(context); + if (!setupRunMode) { + return PythonExecutioner.execAndReturnAllVariables(code, inputs); + } + PythonExecutioner.setVariables(inputs); + PythonObject inspect = Python.importModule("inspect"); + PythonObject getfullargspec = inspect.attr("getfullargspec"); + PythonObject argspec = getfullargspec.call(runF); + PythonObject argsList = argspec.attr("args"); + PythonObject runargs = Python.dict(); + int argsCount = Python.len(argsList).toInt(); + for (int i = 0; i < argsCount; i++) { + PythonObject arg = argsList.get(i); + PythonObject val = Python.globals().get(arg); + if (val.isNone()) { + throw new PythonException("Input value not received for run() argument: " + arg.toString()); + } + runargs.set(arg, val); + } + + PythonObject outDict = runF.callWithKwargs(runargs); + PythonObject globals = Python.globals(); + PythonObject updateF = globals.attr("update"); + updateF.call(outDict); + return PythonExecutioner.getAllVariables(); + } + + } + } + + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java new file mode 100644 index 000000000..f8ec17ed9 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java @@ -0,0 +1,244 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +import org.bytedeco.cpython.PyObject; +import org.bytedeco.javacpp.Pointer; + +import java.util.*; + +import static org.bytedeco.cpython.global.python.*; + +public class PythonObject { + + static { + new PythonExecutioner(); + } + + private boolean owned = true; + private PyObject nativePythonObject; + + + public PythonObject(PyObject nativePythonObject, boolean owned) { + PythonGIL.assertThreadSafe(); + this.nativePythonObject = nativePythonObject; + this.owned = owned; + if (owned && nativePythonObject != null) { + PythonGC.register(this); + } + } + + public PythonObject(PyObject nativePythonObject) { + PythonGIL.assertThreadSafe(); + this.nativePythonObject = nativePythonObject; + if (nativePythonObject != null) { + PythonGC.register(this); + } + + } + + public PyObject getNativePythonObject() { + return nativePythonObject; + } + + public String toString() { + return PythonTypes.STR.toJava(this); + + } + + public boolean isNone() { + if (nativePythonObject == null || Pointer.isNull(nativePythonObject)) { + return true; + } + try (PythonGC _ = PythonGC.pause()) { + PythonObject type = Python.type(this); + boolean ret = Python.type(this).toString().equals("") && toString().equals("None"); + Py_DecRef(type.nativePythonObject); + return ret; + } + } + + public void del() { + PythonGIL.assertThreadSafe(); + if (owned && nativePythonObject != null && !PythonGC.isWatching()) { + Py_DecRef(nativePythonObject); + nativePythonObject = null; + } + } + + public PythonObject callWithArgs(PythonObject args) { + return callWithArgsAndKwargs(args, null); + } + + public PythonObject callWithKwargs(PythonObject kwargs) { + if (!Python.callable(this)) { + throw new PythonException("Object is not callable: " + toString()); + } + PyObject tuple = PyTuple_New(0); + PyObject dict = kwargs.nativePythonObject; + if (PyObject_IsInstance(dict, new PyObject(PyDict_Type())) != 1) { + throw new PythonException("Expected kwargs to be dict. Received: " + kwargs.toString()); + } + PythonObject ret = new PythonObject(PyObject_Call(nativePythonObject, tuple, dict)); + Py_DecRef(tuple); + return ret; + } + + public PythonObject callWithArgsAndKwargs(PythonObject args, PythonObject kwargs) { + PythonGIL.assertThreadSafe(); + PyObject tuple = null; + boolean ownsTuple = false; + try { + if (!Python.callable(this)) { + throw new PythonException("Object is not callable: " + toString()); + } + + if (PyObject_IsInstance(args.nativePythonObject, new PyObject(PyTuple_Type())) == 1) { + tuple = args.nativePythonObject; + } else if (PyObject_IsInstance(args.nativePythonObject, new PyObject(PyList_Type())) == 1) { + tuple = PyList_AsTuple(args.nativePythonObject); + ownsTuple = true; + } else { + throw new PythonException("Expected args to be tuple or list. Received: " + args.toString()); + } + if (kwargs != null && PyObject_IsInstance(kwargs.nativePythonObject, new PyObject(PyDict_Type())) != 1) { + throw new PythonException("Expected kwargs to be dict. Received: " + kwargs.toString()); + } + return new PythonObject(PyObject_Call(nativePythonObject, tuple, kwargs == null ? null : kwargs.nativePythonObject)); + } finally { + if (ownsTuple) Py_DecRef(tuple); + } + + } + + + public PythonObject call(Object... args) { + return callWithArgsAndKwargs(Arrays.asList(args), null); + } + + public PythonObject callWithArgs(List args) { + return call(args, null); + } + + public PythonObject callWithKwargs(Map kwargs) { + return call(null, kwargs); + } + + public PythonObject callWithArgsAndKwargs(List args, Map kwargs) { + PythonGIL.assertThreadSafe(); + try (PythonGC _ = PythonGC.watch()) { + if (!Python.callable(this)) { + throw new PythonException("Object is not callable: " + toString()); + } + PythonObject pyArgs; + PythonObject pyKwargs; + if (args == null) { + pyArgs = new PythonObject(PyTuple_New(0)); + } else { + PythonObject argsList = PythonTypes.convert(args); + pyArgs = new PythonObject(PyList_AsTuple(argsList.getNativePythonObject())); + } + if (kwargs == null) { + pyKwargs = null; + } else { + pyKwargs = PythonTypes.convert(kwargs); + } + PythonObject ret = new PythonObject( + PyObject_Call( + nativePythonObject, + pyArgs.nativePythonObject, + pyKwargs == null ? null : pyKwargs.nativePythonObject + ) + ); + PythonGC.keep(ret); + return ret; + } + + } + + + public PythonObject attr(String attrName) { + PythonGIL.assertThreadSafe(); + return new PythonObject(PyObject_GetAttrString(nativePythonObject, attrName)); + } + + + public PythonObject(Object javaObject) { + PythonGIL.assertThreadSafe(); + if (javaObject instanceof PythonObject) { + owned = false; + nativePythonObject = ((PythonObject) javaObject).nativePythonObject; + } else { + try (PythonGC _ = PythonGC.pause()) { + nativePythonObject = PythonTypes.convert(javaObject).getNativePythonObject(); + } + PythonGC.register(this); + } + + } + + public int toInt() { + return PythonTypes.INT.toJava(this).intValue(); + } + + public long toLong() { + return PythonTypes.INT.toJava(this); + } + + public float toFloat() { + return PythonTypes.FLOAT.toJava(this).floatValue(); + } + + public double toDouble() { + return PythonTypes.FLOAT.toJava(this); + } + + public boolean toBoolean() { + return PythonTypes.BOOL.toJava(this); + + } + + public List toList() { + return PythonTypes.LIST.toJava(this); + } + + public Map toMap() { + return PythonTypes.DICT.toJava(this); + } + + public PythonObject get(int key) { + PythonGIL.assertThreadSafe(); + return new PythonObject(PyObject_GetItem(nativePythonObject, PyLong_FromLong(key))); + } + + public PythonObject get(String key) { + PythonGIL.assertThreadSafe(); + return new PythonObject(PyObject_GetItem(nativePythonObject, PyUnicode_FromString(key))); + } + + public PythonObject get(PythonObject key) { + PythonGIL.assertThreadSafe(); + return new PythonObject(PyObject_GetItem(nativePythonObject, key.nativePythonObject)); + } + + public void set(PythonObject key, PythonObject value) { + PythonGIL.assertThreadSafe(); + PyObject_SetItem(nativePythonObject, key.nativePythonObject, value.nativePythonObject); + } + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java new file mode 100644 index 000000000..b4806aa37 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +public abstract class PythonType { + + private final String name; + private final Class javaType; + + public PythonType(String name, Class javaType) { + this.name = name; + this.javaType = javaType; + } + + public T adapt(Object javaObject) throws PythonException { + return (T) javaObject; + } + + public abstract T toJava(PythonObject pythonObject); + + public abstract PythonObject toPython(T javaObject); + + public boolean accepts(Object javaObject) { + return javaType.isAssignableFrom(javaObject.getClass()); + } + + public String getName() { + return name; + } + + +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java new file mode 100644 index 000000000..0dc20f712 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java @@ -0,0 +1,344 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + + +import org.bytedeco.cpython.PyObject; + +import java.util.*; + +import static org.bytedeco.cpython.global.python.*; +import static org.bytedeco.cpython.global.python.Py_DecRef; + +public class PythonTypes { + + + private static List getPrimitiveTypes() { + return Arrays.asList(STR, INT, FLOAT, BOOL); + } + + private static List getCollectionTypes() { + return Arrays.asList(LIST, DICT); + } + + private static List getExternalTypes() { + //TODO service loader + return new ArrayList<>(); + } + + public static List get() { + List ret = new ArrayList<>(); + ret.addAll(getPrimitiveTypes()); + ret.addAll(getCollectionTypes()); + ret.addAll(getExternalTypes()); + return ret; + } + + public static PythonType get(String name) { + for (PythonType pt : get()) { + if (pt.getName().equals(name)) { // TODO use map instead? + return pt; + } + } + throw new PythonException("Unknown python type: " + name); + } + + public static PythonType getPythonTypeForJavaObject(Object javaObject) { + for (PythonType pt : get()) { + if (pt.accepts(javaObject)) { + return pt; + } + } + throw new PythonException("Unable to find python type for java type: " + javaObject.getClass()); + } + + public static PythonType getPythonTypeForPythonObject(PythonObject pythonObject) { + PyObject pyType = PyObject_Type(pythonObject.getNativePythonObject()); + try { + String pyTypeStr = PythonTypes.STR.toJava(new PythonObject(pyType, false)); + + for (PythonType pt : get()) { + String pyTypeStr2 = ""; + if (pyTypeStr.equals(pyTypeStr2)) { + return pt; + } + } + throw new PythonException("Unable to find converter for python object of type " + pyTypeStr); + } finally { + Py_DecRef(pyType); + } + + + } + + public static PythonObject convert(Object javaObject) { + PythonType pt = getPythonTypeForJavaObject(javaObject); + return pt.toPython(pt.adapt(javaObject)); + } + + public static final PythonType STR = new PythonType("str", String.class) { + + @Override + public String adapt(Object javaObject) { + if (javaObject instanceof String) { + return (String) javaObject; + } + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to String"); + } + + @Override + public String toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + PyObject repr = PyObject_Str(pythonObject.getNativePythonObject()); + PyObject str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~"); + String jstr = PyBytes_AsString(str).getString(); + Py_DecRef(repr); + Py_DecRef(str); + return jstr; + } + + @Override + public PythonObject toPython(String javaObject) { + return new PythonObject(PyUnicode_FromString(javaObject)); + } + }; + + public static final PythonType INT = new PythonType("int", Long.class) { + @Override + public Long adapt(Object javaObject) { + if (javaObject instanceof Number) { + return ((Number) javaObject).longValue(); + } + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Long"); + } + + @Override + public Long toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + long val = PyLong_AsLong(pythonObject.getNativePythonObject()); + if (val == -1 && PyErr_Occurred() != null) { + throw new PythonException("Could not convert value to int: " + pythonObject.toString()); + } + return val; + } + + @Override + public boolean accepts(Object javaObject) { + return (javaObject instanceof Integer) || (javaObject instanceof Long); + } + + @Override + public PythonObject toPython(Long javaObject) { + return new PythonObject(PyLong_FromLong(javaObject)); + } + }; + + public static final PythonType FLOAT = new PythonType("float", Double.class) { + + @Override + public Double adapt(Object javaObject) { + if (javaObject instanceof Number) { + return ((Number) javaObject).doubleValue(); + } + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Long"); + } + + @Override + public Double toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + double val = PyFloat_AsDouble(pythonObject.getNativePythonObject()); + if (val == -1 && PyErr_Occurred() != null) { + throw new PythonException("Could not convert value to float: " + pythonObject.toString()); + } + return val; + } + + @Override + public boolean accepts(Object javaObject) { + return (javaObject instanceof Float) || (javaObject instanceof Double); + } + + @Override + public PythonObject toPython(Double javaObject) { + return new PythonObject(PyFloat_FromDouble(javaObject)); + } + }; + + + public static final PythonType BOOL = new PythonType("bool", Boolean.class) { + + @Override + public Boolean adapt(Object javaObject) { + if (javaObject instanceof Boolean) { + return (Boolean) javaObject; + } + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Boolean"); + } + + @Override + public Boolean toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + PyObject builtins = PyImport_ImportModule("builtins"); + PyObject boolF = PyObject_GetAttrString(builtins, "bool"); + + PythonObject bool = new PythonObject(boolF, false).call(pythonObject); + boolean ret = PyLong_AsLong(bool.getNativePythonObject()) > 0; + bool.del(); + Py_DecRef(boolF); + Py_DecRef(builtins); + return ret; + } + + @Override + public PythonObject toPython(Boolean javaObject) { + return new PythonObject(PyBool_FromLong(javaObject ? 1 : 0)); + } + }; + + + public static final PythonType LIST = new PythonType("list", List.class) { + + @Override + public List adapt(Object javaObject) { + if (javaObject instanceof List) { + return (List) javaObject; + } else if (javaObject instanceof Object[]) { + return Arrays.asList((Object[]) javaObject); + } else { + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to List"); + } + } + + @Override + public List toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + List ret = new ArrayList(); + long n = PyObject_Size(pythonObject.getNativePythonObject()); + if (n < 0) { + throw new PythonException("Object cannot be interpreted as a List"); + } + for (long i = 0; i < n; i++) { + PyObject pyIndex = PyLong_FromLong(i); + PyObject pyItem = PyObject_GetItem(pythonObject.getNativePythonObject(), + pyIndex); + Py_DecRef(pyIndex); + PythonType pyItemType = getPythonTypeForPythonObject(new PythonObject(pyItem, false)); + ret.add(pyItemType.toJava(new PythonObject(pyItem, false))); + Py_DecRef(pyItem); + } + return ret; + } + + @Override + public PythonObject toPython(List javaObject) { + PythonGIL.assertThreadSafe(); + PyObject pyList = PyList_New(javaObject.size()); + for (int i = 0; i < javaObject.size(); i++) { + Object item = javaObject.get(i); + PythonObject pyItem; + boolean owned; + if (item instanceof PythonObject) { + pyItem = (PythonObject) item; + owned = false; + } else if (item instanceof PyObject) { + pyItem = new PythonObject((PyObject) item, false); + owned = false; + } else { + pyItem = PythonTypes.convert(item); + owned = true; + } + Py_IncRef(pyItem.getNativePythonObject()); // reference will be stolen by PyList_SetItem() + PyList_SetItem(pyList, i, pyItem.getNativePythonObject()); + if (owned) pyItem.del(); + } + return new PythonObject(pyList); + } + }; + + public static final PythonType DICT = new PythonType("dict", Map.class) { + + @Override + public Map adapt(Object javaObject) { + if (javaObject instanceof Map) { + return (Map) javaObject; + } + throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Map"); + } + + @Override + public Map toJava(PythonObject pythonObject) { + PythonGIL.assertThreadSafe(); + HashMap ret = new HashMap(); + PyObject dictType = new PyObject(PyDict_Type()); + if (PyObject_IsInstance(pythonObject.getNativePythonObject(), dictType) != 1) { + throw new PythonException("Expected dict, received: " + pythonObject.toString()); + } + + PyObject keys = PyDict_Keys(pythonObject.getNativePythonObject()); + PyObject keysIter = PyObject_GetIter(keys); + PyObject vals = PyDict_Values(pythonObject.getNativePythonObject()); + PyObject valsIter = PyObject_GetIter(vals); + try { + long n = PyObject_Size(pythonObject.getNativePythonObject()); + for (long i = 0; i < n; i++) { + PythonObject pyKey = new PythonObject(PyIter_Next(keysIter), false); + PythonObject pyVal = new PythonObject(PyIter_Next(valsIter), false); + PythonType pyKeyType = getPythonTypeForPythonObject(pyKey); + PythonType pyValType = getPythonTypeForPythonObject(pyVal); + ret.put(pyKeyType.toJava(pyKey), pyValType.toJava(pyVal)); + Py_DecRef(pyKey.getNativePythonObject()); + Py_DecRef(pyVal.getNativePythonObject()); + } + } finally { + Py_DecRef(keysIter); + Py_DecRef(valsIter); + Py_DecRef(keys); + Py_DecRef(vals); + } + return ret; + } + + @Override + public PythonObject toPython(Map javaObject) { + PythonGIL.assertThreadSafe(); + PyObject pyDict = PyDict_New(); + for (Object k : javaObject.keySet()) { + PythonObject pyKey; + if (k instanceof PythonObject) { + pyKey = (PythonObject) k; + } else if (k instanceof PyObject) { + pyKey = new PythonObject((PyObject) k); + } else { + pyKey = PythonTypes.convert(k); + } + Object v = javaObject.get(k); + PythonObject pyVal; + pyVal = PythonTypes.convert(v); + int errCode = PyDict_SetItem(pyDict, pyKey.getNativePythonObject(), pyVal.getNativePythonObject()); + if (errCode != 0) { + String keyStr = pyKey.toString(); + pyKey.del(); + pyVal.del(); + throw new PythonException("Unable to create python dictionary. Unhashable key: " + keyStr); + } + pyKey.del(); + pyVal.del(); + } + return new PythonObject(pyDict); + } + }; +} diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java new file mode 100644 index 000000000..3deb4d2e7 --- /dev/null +++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java @@ -0,0 +1,64 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.eclipse.python4j; + +@lombok.Data +public class PythonVariable { + + private String name; + private String type; + private T value; + + private static boolean validateVariableName(String s) { + if (s.isEmpty()) return false; + if (!Character.isJavaIdentifierStart(s.charAt(0))) return false; + for (int i = 1; i < s.length(); i++) + if (!Character.isJavaIdentifierPart(s.charAt(i))) + return false; + return true; + } + + public PythonVariable(String name, PythonType type, Object value) { + if (!validateVariableName(name)) { + throw new PythonException("Invalid identifier: " + name); + } + this.name = name; + this.type = type.getName(); + setValue(value); + } + + public PythonVariable(String name, PythonType type) { + this(name, type, null); + } + + public PythonType getType() { + return PythonTypes.get(this.type); + } + + public T getValue() { + return this.value; + } + + public void setValue(Object value) { + this.value = value == null ? null : getType().adapt(value); + } + + public PythonObject getPythonObject() { + return getType().toPython(value); + } + +} diff --git a/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py b/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py new file mode 100644 index 000000000..7ae8f6734 --- /dev/null +++ b/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py @@ -0,0 +1,36 @@ +# /******************************************************************************* +# * Copyright (c) 2019 Konduit K.K. +# * +# * This program and the accompanying materials are made available under the +# * terms of the Apache License, Version 2.0 which is available at +# * https://www.apache.org/licenses/LICENSE-2.0. +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# * License for the specific language governing permissions and limitations +# * under the License. +# * +# * SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************/ + +import sys +import traceback +import json +import inspect + +__python_exception__ = "" +try: + pass + sys.stdout.flush() + sys.stderr.flush() +except Exception as ex: + __python_exception__ = ex + try: + exc_info = sys.exc_info() + finally: + print(ex) + traceback.print_exception(*exc_info) + sys.stdout.flush() + sys.stderr.flush() + diff --git a/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java b/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java new file mode 100644 index 000000000..9f5b43dba --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java @@ -0,0 +1,108 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +import org.eclipse.python4j.*; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.concurrent.NotThreadSafe; +import java.util.*; + +@NotThreadSafe +public class PythonBasicExecutionTest { + + @Test + public void testSimpleExec() { + String code = "print('Hello World')"; + PythonExecutioner.exec(code); + } + + @Test + public void testBadCode() throws Exception { + try { + String code = "printx('Hello world')"; + PythonExecutioner.exec(code); + } catch (Exception e) { + Assert.assertEquals("NameError: name 'printx' is not defined", e.getMessage()); + return; + } + throw new Exception("Bad code did not throw!"); + } + + @Test + public void testExecWithInputs() { + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello ")); + inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World")); + String code = "print(x + y)"; + PythonExecutioner.exec(code, inputs, null); + + } + + @Test + public void testExecWithInputsAndOutputs() { + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello ")); + inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World")); + PythonVariable out = new PythonVariable<>("z", PythonTypes.STR); + String code = "z = x + y"; + PythonExecutioner.exec(code, inputs, Collections.singletonList(out)); + Assert.assertEquals("Hello World", out.getValue()); + + } + + @Test + public void testExecAndReturnAllVariables() { + PythonContextManager.reset(); + String code = "a = 5\nb = '10'\nc = 20.0"; + List vars = PythonExecutioner.execAndReturnAllVariables(code); + + Assert.assertEquals("a", vars.get(0).getName()); + Assert.assertEquals(PythonTypes.INT, vars.get(0).getType()); + Assert.assertEquals(5L, (long) vars.get(0).getValue()); + + Assert.assertEquals("b", vars.get(1).getName()); + Assert.assertEquals(PythonTypes.STR, vars.get(1).getType()); + Assert.assertEquals("10", vars.get(1).getValue().toString()); + + Assert.assertEquals("c", vars.get(2).getName()); + Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType()); + Assert.assertEquals(20.0, (double) vars.get(2).getValue(), 1e-5); + } + + @Test + public void testExecWithInputsAndReturnAllVariables() { + PythonContextManager.reset(); + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 5)); + String code = "b = '10'\nc = 20.0 + a"; + List vars = PythonExecutioner.execAndReturnAllVariables(code, inputs); + + Assert.assertEquals("a", vars.get(0).getName()); + Assert.assertEquals(PythonTypes.INT, vars.get(0).getType()); + Assert.assertEquals(5L, (long) vars.get(0).getValue()); + + Assert.assertEquals("b", vars.get(1).getName()); + Assert.assertEquals(PythonTypes.STR, vars.get(1).getType()); + Assert.assertEquals("10", vars.get(1).getValue().toString()); + + Assert.assertEquals("c", vars.get(2).getName()); + Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType()); + Assert.assertEquals(25.0, (double) vars.get(2).getValue(), 1e-5); + } + +} diff --git a/python4j/python4j-core/src/test/java/PythonCollectionsTest.java b/python4j/python4j-core/src/test/java/PythonCollectionsTest.java new file mode 100644 index 000000000..7e63d9d28 --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonCollectionsTest.java @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +import org.eclipse.python4j.PythonException; +import org.eclipse.python4j.PythonObject; +import org.eclipse.python4j.PythonTypes; +import org.junit.Assert; +import org.junit.Test; + +import java.util.*; + + +@javax.annotation.concurrent.NotThreadSafe +public class PythonCollectionsTest { + + + @Test + public void testPythonDictFromMap() throws PythonException { + Map map = new HashMap(); + map.put("a", 1); + map.put(1, "a"); + map.put("list1", Arrays.asList(1, 2.0, 3, 4f)); + Map innerMap = new HashMap(); + innerMap.put("b", 2); + innerMap.put(2, "b"); + map.put("innermap", innerMap); + map.put("list2", Arrays.asList(4, "5", innerMap, false, true)); + PythonObject dict = PythonTypes.convert(map); + Map map2 = PythonTypes.DICT.toJava(dict); + Assert.assertEquals(map.toString(), map2.toString()); + } + + @Test + public void testPythonListFromList() throws PythonException{ + List list = new ArrayList<>(); + list.add(1); + list.add("2"); + list.add(Arrays.asList("a", 1.0, 2f, 10, true, false)); + Map map = new HashMap(); + map.put("a", 1); + map.put(1, "a"); + map.put("list1", Arrays.asList(1, 2.0, 3, 4f)); + list.add(map); + PythonObject dict = PythonTypes.convert(list); + List list2 = PythonTypes.LIST.toJava(dict); + Assert.assertEquals(list.toString(), list2.toString()); + } +} diff --git a/python4j/python4j-core/src/test/java/PythonContextManagerTest.java b/python4j/python4j-core/src/test/java/PythonContextManagerTest.java new file mode 100644 index 000000000..a4451764c --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonContextManagerTest.java @@ -0,0 +1,51 @@ + +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +import org.eclipse.python4j.Python; +import org.eclipse.python4j.PythonContextManager; +import org.eclipse.python4j.PythonExecutioner; +import org.junit.Assert; +import org.junit.Test; +import javax.annotation.concurrent.NotThreadSafe; + +@NotThreadSafe +public class PythonContextManagerTest { + + @Test + public void testInt() throws Exception{ + Python.setContext("context1"); + Python.exec("a = 1"); + Python.setContext("context2"); + Python.exec("a = 2"); + Python.setContext("context3"); + Python.exec("a = 3"); + + + Python.setContext("context1"); + Assert.assertEquals(1, PythonExecutioner.getVariable("a").toInt()); + + Python.setContext("context2"); + Assert.assertEquals(2, PythonExecutioner.getVariable("a").toInt()); + + Python.setContext("context3"); + Assert.assertEquals(3, PythonExecutioner.getVariable("a").toInt()); + + PythonContextManager.deleteNonMainContexts(); + } + +} diff --git a/python4j/python4j-core/src/test/java/PythonGCTest.java b/python4j/python4j-core/src/test/java/PythonGCTest.java new file mode 100644 index 000000000..f8c6ecba5 --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonGCTest.java @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +import org.eclipse.python4j.Python; +import org.eclipse.python4j.PythonGC; +import org.eclipse.python4j.PythonObject; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.concurrent.NotThreadSafe; + + +@NotThreadSafe +public class PythonGCTest { + + @Test + public void testGC() throws Exception{ + PythonObject gcModule = Python.importModule("gc"); + PythonObject getObjects = gcModule.attr("get_objects"); + PythonObject pyObjCount1 = Python.len(getObjects.call()); + long objCount1 = pyObjCount1.toLong(); + PythonObject pyList = Python.list(); + pyList.attr("append").call("a"); + pyList.attr("append").call(1.0); + pyList.attr("append").call(true); + PythonObject pyObjCount2 = Python.len(getObjects.call()); + long objCount2 = pyObjCount2.toLong(); + long diff = objCount2 - objCount1; + Assert.assertTrue(diff > 2); + try(PythonGC gc = PythonGC.watch()){ + PythonObject pyList2 = Python.list(); + pyList2.attr("append").call("a"); + pyList2.attr("append").call(1.0); + pyList2.attr("append").call(true); + } + PythonObject pyObjCount3 = Python.len(getObjects.call()); + long objCount3 = pyObjCount3.toLong(); + diff = objCount3 - objCount2; + Assert.assertEquals(2, diff);// 2 objects created during function call + } +} diff --git a/python4j/python4j-core/src/test/java/PythonJobTest.java b/python4j/python4j-core/src/test/java/PythonJobTest.java new file mode 100644 index 000000000..016045a25 --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonJobTest.java @@ -0,0 +1,287 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +import org.eclipse.python4j.PythonContextManager; +import org.eclipse.python4j.PythonJob; +import org.eclipse.python4j.PythonTypes; +import org.eclipse.python4j.PythonVariable; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + + +@javax.annotation.concurrent.NotThreadSafe +public class PythonJobTest { + + @Test + public void testPythonJobBasic() throws Exception{ + PythonContextManager.deleteNonMainContexts(); + + String code = "c = a + b"; + PythonJob job = new PythonJob("job1", code, false); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + List outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.INT)); + + + job.exec(inputs, outputs); + assertEquals("c", outputs.get(0).getName()); + assertEquals(5L, (long)outputs.get(0).getValue()); + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + + outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT)); + + + job.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(7.0, (double)outputs.get(0).getValue(), 1e-5); + + + } + + @Test + public void testPythonJobReturnAllVariables()throws Exception{ + PythonContextManager.deleteNonMainContexts(); + + String code = "c = a + b"; + PythonJob job = new PythonJob("job1", code, false); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + + List outputs = job.execAndReturnAllVariables(inputs); + + + assertEquals("a", outputs.get(0).getName()); + assertEquals(2L, (long)outputs.get(0).getValue()); + assertEquals("b", outputs.get(1).getName()); + assertEquals(3L, (long)outputs.get(1).getValue()); + assertEquals("c", outputs.get(2).getName()); + assertEquals(5L, (long)outputs.get(2).getValue()); + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + outputs = job.execAndReturnAllVariables(inputs); + assertEquals("a", outputs.get(0).getName()); + assertEquals(3.0, (double)outputs.get(0).getValue(), 1e-5); + assertEquals("b", outputs.get(1).getName()); + assertEquals(4.0, (double)outputs.get(1).getValue(), 1e-5); + assertEquals("c", outputs.get(2).getName()); + assertEquals(7.0, (double)outputs.get(2).getValue(), 1e-5); + + } + + + @Test + public void testMultiplePythonJobsParallel()throws Exception{ + PythonContextManager.deleteNonMainContexts(); + String code1 = "c = a + b"; + PythonJob job1 = new PythonJob("job1", code1, false); + + String code2 = "c = a - b"; + PythonJob job2 = new PythonJob("job2", code2, false); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + + List outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.INT)); + + job1.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(5L, (long)outputs.get(0).getValue()); + + + job2.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(-1L, (long)outputs.get(0).getValue()); + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + + outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT)); + + + job1.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(7.0, (double)outputs.get(0).getValue(), 1e-5); + + job2.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(-1., (double)outputs.get(0).getValue(), 1e-5); + + } + + + @Test + public void testPythonJobSetupRun()throws Exception{ + + PythonContextManager.deleteNonMainContexts(); + String code = "five=None\n" + + "def setup():\n" + + " global five\n"+ + " five = 5\n\n" + + "def run(a, b):\n" + + " c = a + b + five\n"+ + " return {'c':c}\n\n"; + PythonJob job = new PythonJob("job1", code, true); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + List outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.INT)); + job.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(10L, (long)outputs.get(0).getValue()); + + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + + + outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT)); + + job.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(12.0, (double)outputs.get(0).getValue(), 1e-5); + + } + @Test + public void testPythonJobSetupRunAndReturnAllVariables()throws Exception{ + PythonContextManager.deleteNonMainContexts(); + String code = "five=None\n" + + "c=None\n"+ + "def setup():\n" + + " global five\n"+ + " five = 5\n\n" + + "def run(a, b):\n" + + " global c\n" + + " c = a + b + five\n"; + PythonJob job = new PythonJob("job1", code, true); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + List outputs = job.execAndReturnAllVariables(inputs); + + assertEquals("c", outputs.get(1).getName()); + assertEquals(10L, (long)outputs.get(1).getValue()); + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + + outputs = job.execAndReturnAllVariables(inputs); + + + assertEquals("c", outputs.get(1).getName()); + assertEquals(12.0, (double)outputs.get(1).getValue(), 1e-5); + + + + } + + @Test + public void testMultiplePythonJobsSetupRunParallel()throws Exception{ + PythonContextManager.deleteNonMainContexts(); + + String code1 = "five=None\n" + + "def setup():\n" + + " global five\n"+ + " five = 5\n\n" + + "def run(a, b):\n" + + " c = a + b + five\n"+ + " return {'c':c}\n\n"; + PythonJob job1 = new PythonJob("job1", code1, true); + + String code2 = "five=None\n" + + "def setup():\n" + + " global five\n"+ + " five = 5\n\n" + + "def run(a, b):\n" + + " c = a + b - five\n"+ + " return {'c':c}\n\n"; + PythonJob job2 = new PythonJob("job2", code2, true); + + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2)); + inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3)); + + + List outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.INT)); + + job1.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(10L, (long)outputs.get(0).getValue()); + + job2.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(0L, (long)outputs.get(0).getValue()); + + inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0)); + inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0)); + + outputs = new ArrayList<>(); + outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT)); + + + job1.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(12.0, (double)outputs.get(0).getValue(), 1e-5); + + job2.exec(inputs, outputs); + + assertEquals("c", outputs.get(0).getName()); + assertEquals(2.0, (double)outputs.get(0).getValue(), 1e-5); + + } + +} diff --git a/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java b/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java new file mode 100644 index 000000000..ec544b65f --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java @@ -0,0 +1,169 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +import org.eclipse.python4j.*; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.concurrent.NotThreadSafe; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + + +@NotThreadSafe +public class PythonMultiThreadTest { + + @Test + public void testMultiThreading1()throws Throwable{ + final List exceptions = Collections.synchronizedList(new ArrayList()); + Runnable runnable = new Runnable() { + @Override + public void run() { + try(PythonGIL gil = PythonGIL.lock()){ + try(PythonGC gc = PythonGC.watch()){ + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello ")); + inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World")); + PythonVariable out = new PythonVariable<>("z", PythonTypes.STR); + String code = "z = x + y"; + PythonExecutioner.exec(code, inputs, Collections.singletonList(out)); + Assert.assertEquals("Hello World", out.getValue()); + System.out.println(out.getValue() + " From thread " + Thread.currentThread().getId()); + } + }catch (Throwable e){ + exceptions.add(e); + } + } + }; + + int numThreads = 10; + Thread[] threads = new Thread[numThreads]; + for (int i = 0; i < threads.length; i++){ + threads[i] = new Thread(runnable); + } + for (int i = 0; i < threads.length; i++){ + threads[i].start(); + } + Thread.sleep(100); + for (int i = 0; i < threads.length; i++){ + threads[i].join(); + } + if (!exceptions.isEmpty()){ + throw(exceptions.get(0)); + } + + } + @Test + public void testMultiThreading2()throws Throwable{ + final List exceptions = Collections.synchronizedList(new ArrayList()); + Runnable runnable = new Runnable() { + @Override + public void run() { + try(PythonGIL gil = PythonGIL.lock()){ + try(PythonGC gc = PythonGC.watch()){ + PythonContextManager.reset(); + PythonContextManager.reset(); + List inputs = new ArrayList<>(); + inputs.add(new PythonVariable<>("a", PythonTypes.INT, 5)); + String code = "b = '10'\nc = 20.0 + a"; + List vars = PythonExecutioner.execAndReturnAllVariables(code, inputs); + + Assert.assertEquals("a", vars.get(0).getName()); + Assert.assertEquals(PythonTypes.INT, vars.get(0).getType()); + Assert.assertEquals(5L, (long)vars.get(0).getValue()); + + Assert.assertEquals("b", vars.get(1).getName()); + Assert.assertEquals(PythonTypes.STR, vars.get(1).getType()); + Assert.assertEquals("10", vars.get(1).getValue().toString()); + + Assert.assertEquals("c", vars.get(2).getName()); + Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType()); + Assert.assertEquals(25.0, (double)vars.get(2).getValue(), 1e-5); + } + }catch (Throwable e){ + exceptions.add(e); + } + } + }; + + int numThreads = 10; + Thread[] threads = new Thread[numThreads]; + for (int i = 0; i < threads.length; i++){ + threads[i] = new Thread(runnable); + } + for (int i = 0; i < threads.length; i++){ + threads[i].start(); + } + Thread.sleep(100); + for (int i = 0; i < threads.length; i++){ + threads[i].join(); + } + if (!exceptions.isEmpty()){ + throw(exceptions.get(0)); + } + } + + @Test + public void testMultiThreading3() throws Throwable{ + PythonContextManager.deleteNonMainContexts(); + + String code = "c = a + b"; + final PythonJob job = new PythonJob("job1", code, false); + + final List exceptions = Collections.synchronizedList(new ArrayList()); + + class JobThread extends Thread{ + private int a, b, c; + public JobThread(int a, int b, int c){ + this.a = a; + this.b = b; + this.c = c; + } + @Override + public void run(){ + try{ + PythonVariable out = new PythonVariable<>("c", PythonTypes.INT); + job.exec(Arrays.asList(new PythonVariable<>("a", PythonTypes.INT, a), + new PythonVariable<>("b", PythonTypes.INT, b)), + Collections.singletonList(out)); + Assert.assertEquals(c, out.getValue().intValue()); + }catch (Exception e){ + exceptions.add(e); + } + + } + } + int numThreads = 10; + JobThread[] threads = new JobThread[numThreads]; + for (int i=0; i < threads.length; i++){ + threads[i] = new JobThread(i, i + 3, 2 * i +3); + } + + for (int i = 0; i < threads.length; i++){ + threads[i].start(); + } + Thread.sleep(100); + for (int i = 0; i < threads.length; i++){ + threads[i].join(); + } + + if (!exceptions.isEmpty()){ + throw(exceptions.get(0)); + } + } +} diff --git a/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java b/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java new file mode 100644 index 000000000..ae10ed8dc --- /dev/null +++ b/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java @@ -0,0 +1,82 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +import org.eclipse.python4j.PythonException; +import org.eclipse.python4j.PythonObject; +import org.eclipse.python4j.PythonTypes; +import org.junit.Assert; +import org.junit.Test; + +public class PythonPrimitiveTypesTest { + + @Test + public void testInt() throws PythonException { + long j = 3; + PythonObject p = PythonTypes.INT.toPython(j); + long j2 = PythonTypes.INT.toJava(p); + + Assert.assertEquals(j, j2); + + PythonObject p2 = PythonTypes.convert(j); + long j3 = PythonTypes.INT.toJava(p2); + + Assert.assertEquals(j, j3); + } + + @Test + public void testStr() throws PythonException{ + String s = "abcd"; + PythonObject p = PythonTypes.STR.toPython(s); + String s2 = PythonTypes.STR.toJava(p); + + Assert.assertEquals(s, s2); + + PythonObject p2 = PythonTypes.convert(s); + String s3 = PythonTypes.STR.toJava(p2); + + Assert.assertEquals(s, s3); + } + + @Test + public void testFloat() throws PythonException{ + double f = 7; + PythonObject p = PythonTypes.FLOAT.toPython(f); + double f2 = PythonTypes.FLOAT.toJava(p); + + Assert.assertEquals(f, f2, 1e-5); + + PythonObject p2 = PythonTypes.convert(f); + double f3 = PythonTypes.FLOAT.toJava(p2); + + Assert.assertEquals(f, f3, 1e-5); + } + + @Test + public void testBool() throws PythonException{ + boolean b = true; + PythonObject p = PythonTypes.BOOL.toPython(b); + boolean b2 = PythonTypes.BOOL.toJava(p); + + Assert.assertEquals(b, b2); + + PythonObject p2 = PythonTypes.convert(b); + boolean b3 = PythonTypes.BOOL.toJava(p2); + + Assert.assertEquals(b, b3); + } + +} diff --git a/python4j/python4j-numpy/pom.xml b/python4j/python4j-numpy/pom.xml new file mode 100644 index 000000000..527a9343f --- /dev/null +++ b/python4j/python4j-numpy/pom.xml @@ -0,0 +1,42 @@ + + + + python4j-parent + org.eclipse + 1.0.0-SNAPSHOT + + 4.0.0 + + python4j-numpy + + + + org.bytedeco + numpy-platform + ${numpy.javacpp.version} + + + org.nd4j + nd4j-native-api + ${project.version} + + + org.nd4j + nd4j-common-tests + ${nd4j.version} + test + + + + + + test-nd4j-native + + + test-nd4j-cuda-10.2 + + + + \ No newline at end of file From 880080312395c7827814fbe7619e976a248631a4 Mon Sep 17 00:00:00 2001 From: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Date: Thu, 21 May 2020 10:04:05 +0300 Subject: [PATCH 11/21] Switch to static linking for Android (#472) --- libnd4j/cmake/android-arm.cmake | 2 +- libnd4j/cmake/android-arm64.cmake | 2 +- libnd4j/cmake/android-x86.cmake | 2 +- libnd4j/cmake/android-x86_64.cmake | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libnd4j/cmake/android-arm.cmake b/libnd4j/cmake/android-arm.cmake index 427bc6a34..4db515400 100644 --- a/libnd4j/cmake/android-arm.cmake +++ b/libnd4j/cmake/android-arm.cmake @@ -3,7 +3,7 @@ set(CMAKE_SYSTEM_NAME Android) set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a) set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}") -set(CMAKE_ANDROID_STL_TYPE c++_shared) +set(CMAKE_ANDROID_STL_TYPE c++_static) set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}") set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang) diff --git a/libnd4j/cmake/android-arm64.cmake b/libnd4j/cmake/android-arm64.cmake index 33ee454e7..68a4e60a5 100644 --- a/libnd4j/cmake/android-arm64.cmake +++ b/libnd4j/cmake/android-arm64.cmake @@ -3,7 +3,7 @@ set(CMAKE_SYSTEM_NAME Android) set(CMAKE_ANDROID_ARCH_ABI arm64-v8a) set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}") -set(CMAKE_ANDROID_STL_TYPE c++_shared) +set(CMAKE_ANDROID_STL_TYPE c++_static) set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}") set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang) diff --git a/libnd4j/cmake/android-x86.cmake b/libnd4j/cmake/android-x86.cmake index 7290b0b8d..be6600bcc 100644 --- a/libnd4j/cmake/android-x86.cmake +++ b/libnd4j/cmake/android-x86.cmake @@ -3,7 +3,7 @@ set(CMAKE_SYSTEM_NAME Android) set(CMAKE_ANDROID_ARCH_ABI x86) set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}") -set(CMAKE_ANDROID_STL_TYPE c++_shared) +set(CMAKE_ANDROID_STL_TYPE c++_static) set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}") set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang) diff --git a/libnd4j/cmake/android-x86_64.cmake b/libnd4j/cmake/android-x86_64.cmake index 5ff797910..ea9b5e356 100644 --- a/libnd4j/cmake/android-x86_64.cmake +++ b/libnd4j/cmake/android-x86_64.cmake @@ -3,7 +3,7 @@ set(CMAKE_SYSTEM_NAME Android) set(CMAKE_ANDROID_ARCH_ABI x86_64) set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}") -set(CMAKE_ANDROID_STL_TYPE c++_shared) +set(CMAKE_ANDROID_STL_TYPE c++_static) set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}") set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang) From ecdee6369dde14883ea753aa5d080d329816f6cc Mon Sep 17 00:00:00 2001 From: Fariz Rahman Date: Sun, 24 May 2020 14:47:17 +0400 Subject: [PATCH 12/21] IntIndexer -> UIntIndexer (#476) --- .../nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java | 1 + 1 file changed, 1 insertion(+) diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index 88d0cbe44..12a76f2b9 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -1131,6 +1131,7 @@ public class Nd4j { case LONG: return LongIndexer.create((LongPointer) pointer); case UINT32: + return UIntIndexer.create((IntPointer) pointer); case INT: return IntIndexer.create((IntPointer) pointer); case UINT16: From a18417193d2d0c9f0de516bc325f9c43c92be55e Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 26 May 2020 14:13:48 +0300 Subject: [PATCH 13/21] Shugeo resize area fix4 (#465) * Restore resize_area test suite. Signed-off-by: shugeo * Fixed resize_area kernel for cuda platform to avoid range violation. Signed-off-by: shugeo * Fixed resizeAreaKernel start. Signed-off-by: shugeo * Fixed potential error handling with resize area cuda implementation. Signed-off-by: shugeo --- .../declarable/helpers/cuda/image_resize.cu | 35 +++++++++++++++++-- .../layers_tests/DeclarableOpsTests11.cpp | 4 +-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu index d483f87b3..180c8ad0e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu @@ -1066,7 +1066,7 @@ namespace helpers { const Nd4jLong yStart = math::nd4j_floor(inY); const Nd4jLong yEnd = math::nd4j_ceil(inY1); auto scalesDim = yEnd - yStart; - auto yScaleCache = cachePool + (batch * pSt->outWidth + y) * scalesDim * sizeof(ScaleCache); + auto yScaleCache = cachePool + (batch * pSt->outHeight + y) * pSt->outWidth; //auto startPtr = sharedPtr + y * scalesDim * sizeof(float); //float* yScales = yScalesShare + y * sizeof(float) * scalesDim;//reinterpret_cast(startPtr); //shared + y * scalesDim * y + scalesDim * sizeof(T const *) [scalesDim]; @@ -1113,14 +1113,34 @@ namespace helpers { auto outputPtr = reinterpret_cast(output->specialBuffer()); // output is always float. TO DO: provide another float types also with template declaration ImageResizerState* pSt; auto err = cudaMalloc(&pSt, sizeof(ImageResizerState)); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: Cannot allocate memory for ImageResizerState", err); + } + err = cudaMemcpyAsync(pSt, &st, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: Cannot copy to device memory", err); + } ScaleCache* cachePool; - err = cudaMalloc(&cachePool, sizeof(ScaleCache) * st.batchSize * st.outWidth * st.outHeight); - resizeAreaKernel<<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr, + auto cachePoolSize = sizeof(ScaleCache) * st.batchSize * st.outWidth * st.outHeight; + err = cudaMalloc(&cachePool, cachePoolSize); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: Cannot allocate memory for cache", err); + } + resizeAreaKernel<<<128, 128, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr, output->specialShapeInfo(), cachePool); err = cudaStreamSynchronize(*stream); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: An error occured with kernel running", err); + } err = cudaFree(cachePool); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: Cannot deallocate memory for cache", err); + } err = cudaFree(pSt); + if (err != 0) { + throw cuda_exception::build("helpers::resizeArea: Cannot deallocate memory for ImageResizeState", err); + } } // ------------------------------------------------------------------------------------------------------------------ // template @@ -1134,11 +1154,20 @@ namespace helpers { CachedInterpolation* xCached; //(st.outWidth); auto err = cudaMalloc(&xCached, sizeof(CachedInterpolation) * st.outWidth); + if (err != 0) { + throw cuda_exception::build("helpers::resizeAreaFunctor_: Cannot allocate memory for cached interpolations", err); + } NDArray::prepareSpecialUse({output}, {image}); fillInterpolationCache<<<128, 128, 256, *stream>>>(xCached, st.outWidth, st.inWidth, st.widthScale); resizeArea(stream, st, xCached, image, output); err = cudaStreamSynchronize(*stream); + if (err != 0) { + throw cuda_exception::build("helpers::resizeAreaFunctor_: Error occured when kernel was running", err); + } err = cudaFree(xCached); + if (err != 0) { + throw cuda_exception::build("helpers::resizeAreaFunctor_: Cannot deallocate memory for cached interpolations", err); + } NDArray::registerSpecialUse({output}, {image}); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index e4391c688..23c40ebae 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -1054,7 +1054,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeBicubic_Test8) { ASSERT_TRUE(testData.equalsTo(result)); } -/* + TEST_F(DeclarableOpsTests11, ImageResizeArea_Test1) { NDArray input = NDArrayFactory::create('c', {1, 3, 3, 4}); @@ -1532,7 +1532,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test15) { ASSERT_TRUE(expected.isSameShape(result)); ASSERT_TRUE(expected.equalsTo(result)); } - */ + /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, summaryStatsData_test1) { From 5568b9d72ff519a3cdaa4db773a02cf72726582e Mon Sep 17 00:00:00 2001 From: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Date: Wed, 27 May 2020 07:41:02 -0400 Subject: [PATCH 14/21] RL4J: Add AgentLearner (#470) Signed-off-by: Alexandre Boulanger --- .../org/deeplearning4j/rl4j/agent/Agent.java | 113 +++++++--- .../rl4j/agent/AgentLearner.java | 115 ++++++++++ .../org/deeplearning4j/rl4j/agent/IAgent.java | 55 +++++ .../rl4j/agent/IAgentLearner.java | 24 ++ .../agent/learning/ILearningBehavior.java | 49 ++++ .../rl4j/agent/learning/LearningBehavior.java | 59 +++++ .../rl4j/agent/listener/AgentListener.java | 47 +++- .../agent/listener/AgentListenerList.java | 39 ++++ .../agent/update/DQNNeuralNetUpdateRule.java | 62 +++++ .../rl4j/agent/update/Gradients.java | 26 +++ .../rl4j/agent/update/IUpdateRule.java | 37 +++ .../rl4j/environment/ActionSchema.java | 9 - .../rl4j/environment/Environment.java | 43 ++++ .../rl4j/environment/IActionSchema.java | 26 +++ .../rl4j/environment/IntegerActionSchema.java | 47 ++++ .../rl4j/environment/Schema.java | 18 +- .../rl4j/environment/StepResult.java | 15 ++ .../rl4j/experience/ExperienceHandler.java | 5 + .../ReplayMemoryExperienceHandler.java | 7 + .../StateActionExperienceHandler.java | 17 +- .../rl4j/helper/INDArrayHelper.java | 31 ++- .../learning/async/AsyncThreadDiscrete.java | 14 +- .../AsyncNStepQLearningThreadDiscrete.java | 3 +- .../discrete/QLearningUpdateAlgorithm.java | 24 +- .../rl4j/learning/sync/ExpReplay.java | 5 + .../rl4j/learning/sync/IExpReplay.java | 5 + .../learning/sync/qlearning/QLearning.java | 19 +- .../qlearning/discrete/QLearningDiscrete.java | 70 +++--- .../rl4j/mdp/CartpoleEnvironment.java | 17 +- .../deeplearning4j/rl4j/policy/EpsGreedy.java | 87 +++++++- .../rl4j/policy/INeuralNetPolicy.java | 7 + .../deeplearning4j/rl4j/policy/Policy.java | 2 +- .../rl4j/agent/AgentLearnerTest.java | 211 ++++++++++++++++++ .../deeplearning4j/rl4j/agent/AgentTest.java | 44 ++-- .../agent/learning/LearningBehaviorTest.java | 133 +++++++++++ .../ReplayMemoryExperienceHandlerTest.java | 100 ++++++--- .../StateActionExperienceHandlerTest.java | 70 +++++- .../rl4j/helper/INDArrayHelperTest.java | 21 ++ .../QLearningUpdateAlgorithmTest.java | 75 ++++--- .../discrete/QLearningDiscreteTest.java | 34 ++- 40 files changed, 1541 insertions(+), 244 deletions(-) create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java delete mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java create mode 100644 rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java create mode 100644 rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java index 999f12e8c..198c2a1ca 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java @@ -1,3 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.agent; import lombok.AccessLevel; @@ -14,7 +29,13 @@ import org.nd4j.common.base.Preconditions; import java.util.Map; -public class Agent { +/** + * An agent implementation. The Agent will use a {@link IPolicy} to interact with an {@link Environment} and receive + * a reward. + * + * @param The type of action + */ +public class Agent implements IAgent { @Getter private final String id; @@ -37,19 +58,28 @@ public class Agent { private ACTION lastAction; @Getter - private int episodeStepNumber; + private int episodeStepCount; @Getter private double reward; protected boolean canContinue; - private Agent(Builder builder) { - this.environment = builder.environment; - this.transformProcess = builder.transformProcess; - this.policy = builder.policy; - this.maxEpisodeSteps = builder.maxEpisodeSteps; - this.id = builder.id; + /** + * @param environment The {@link Environment} to be used + * @param transformProcess The {@link TransformProcess} to be used to transform the raw observations into usable ones. + * @param policy The {@link IPolicy} to be used + * @param maxEpisodeSteps The maximum number of steps an episode can have before being interrupted. Use null to have no max. + * @param id A user-supplied id to identify the instance. + */ + public Agent(@NonNull Environment environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy policy, Integer maxEpisodeSteps, String id) { + Preconditions.checkArgument(maxEpisodeSteps == null || maxEpisodeSteps > 0, "maxEpisodeSteps must be null (no maximum) or greater than 0, got", maxEpisodeSteps); + + this.environment = environment; + this.transformProcess = transformProcess; + this.policy = policy; + this.maxEpisodeSteps = maxEpisodeSteps; + this.id = id; listeners = buildListenerList(); } @@ -58,10 +88,17 @@ public class Agent { return new AgentListenerList(); } + /** + * Add a {@link AgentListener} that will be notified when agent events happens + * @param listener + */ public void addListener(AgentListener listener) { listeners.add(listener); } + /** + * This will run a single episode + */ public void run() { runEpisode(); } @@ -80,7 +117,7 @@ public class Agent { canContinue = listeners.notifyBeforeEpisode(this); - while (canContinue && !environment.isEpisodeFinished() && (maxEpisodeSteps == null || episodeStepNumber < maxEpisodeSteps)) { + while (canContinue && !environment.isEpisodeFinished() && (maxEpisodeSteps == null || episodeStepCount < maxEpisodeSteps)) { performStep(); } @@ -100,9 +137,9 @@ public class Agent { } protected void resetEnvironment() { - episodeStepNumber = 0; + episodeStepCount = 0; Map channelsData = environment.reset(); - this.observation = transformProcess.transform(channelsData, episodeStepNumber, false); + this.observation = transformProcess.transform(channelsData, episodeStepCount, false); } protected void resetPolicy() { @@ -125,7 +162,6 @@ public class Agent { } StepResult stepResult = act(action); - handleStepResult(stepResult); onAfterStep(stepResult); @@ -134,11 +170,11 @@ public class Agent { return; } - incrementEpisodeStepNumber(); + incrementEpisodeStepCount(); } - protected void incrementEpisodeStepNumber() { - ++episodeStepNumber; + protected void incrementEpisodeStepCount() { + ++episodeStepCount; } protected ACTION decideAction(Observation observation) { @@ -150,12 +186,15 @@ public class Agent { } protected StepResult act(ACTION action) { - return environment.step(action); - } + Observation observationBeforeAction = observation; - protected void handleStepResult(StepResult stepResult) { - observation = convertChannelDataToObservation(stepResult, episodeStepNumber + 1); - reward +=computeReward(stepResult); + StepResult stepResult = environment.step(action); + observation = convertChannelDataToObservation(stepResult, episodeStepCount + 1); + reward += computeReward(stepResult); + + onAfterAction(observationBeforeAction, action, stepResult); + + return stepResult; } protected Observation convertChannelDataToObservation(StepResult stepResult, int episodeStepNumberOfObs) { @@ -166,6 +205,10 @@ public class Agent { return stepResult.getReward(); } + protected void onAfterAction(Observation observationBeforeAction, ACTION action, StepResult stepResult) { + // Do Nothing + } + protected void onAfterStep(StepResult stepResult) { // Do Nothing } @@ -174,16 +217,24 @@ public class Agent { // Do Nothing } - public static Builder builder(@NonNull Environment environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy policy) { + /** + * + * @param environment + * @param transformProcess + * @param policy + * @param + * @return + */ + public static Builder builder(@NonNull Environment environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy policy) { return new Builder<>(environment, transformProcess, policy); } - public static class Builder { - private final Environment environment; - private final TransformProcess transformProcess; - private final IPolicy policy; - private Integer maxEpisodeSteps = null; // Default, no max - private String id; + public static class Builder { + protected final Environment environment; + protected final TransformProcess transformProcess; + protected final IPolicy policy; + protected Integer maxEpisodeSteps = null; // Default, no max + protected String id; public Builder(@NonNull Environment environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy policy) { this.environment = environment; @@ -191,20 +242,20 @@ public class Agent { this.policy = policy; } - public Builder maxEpisodeSteps(int maxEpisodeSteps) { + public Builder maxEpisodeSteps(int maxEpisodeSteps) { Preconditions.checkArgument(maxEpisodeSteps > 0, "maxEpisodeSteps must be greater than 0, got", maxEpisodeSteps); this.maxEpisodeSteps = maxEpisodeSteps; return this; } - public Builder id(String id) { + public Builder id(String id) { this.id = id; return this; } - public Agent build() { - return new Agent(this); + public AGENT_TYPE build() { + return (AGENT_TYPE)new Agent(environment, transformProcess, policy, maxEpisodeSteps, id); } } } \ No newline at end of file diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java new file mode 100644 index 000000000..8fd963cda --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java @@ -0,0 +1,115 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent; + +import lombok.Getter; +import lombok.NonNull; +import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior; +import org.deeplearning4j.rl4j.environment.Environment; +import org.deeplearning4j.rl4j.environment.StepResult; +import org.deeplearning4j.rl4j.observation.Observation; +import org.deeplearning4j.rl4j.observation.transform.TransformProcess; +import org.deeplearning4j.rl4j.policy.IPolicy; + +/** + * The ActionLearner is an {@link Agent} that delegate the learning to a {@link ILearningBehavior}. + * @param The type of the action + */ +public class AgentLearner extends Agent implements IAgentLearner { + + @Getter + private int totalStepCount = 0; + + private final ILearningBehavior learningBehavior; + private double rewardAtLastExperience; + + /** + * + * @param environment The {@link Environment} to be used + * @param transformProcess The {@link TransformProcess} to be used to transform the raw observations into usable ones. + * @param policy The {@link IPolicy} to be used + * @param maxEpisodeSteps The maximum number of steps an episode can have before being interrupted. Use null to have no max. + * @param id A user-supplied id to identify the instance. + * @param learningBehavior The {@link ILearningBehavior} that will be used to supervise the learning. + */ + public AgentLearner(Environment environment, TransformProcess transformProcess, IPolicy policy, Integer maxEpisodeSteps, String id, @NonNull ILearningBehavior learningBehavior) { + super(environment, transformProcess, policy, maxEpisodeSteps, id); + + this.learningBehavior = learningBehavior; + } + + @Override + protected void reset() { + super.reset(); + + rewardAtLastExperience = 0; + } + + @Override + protected void onBeforeEpisode() { + super.onBeforeEpisode(); + + learningBehavior.handleEpisodeStart(); + } + + @Override + protected void onAfterAction(Observation observationBeforeAction, ACTION action, StepResult stepResult) { + if(!observationBeforeAction.isSkipped()) { + double rewardSinceLastExperience = getReward() - rewardAtLastExperience; + learningBehavior.handleNewExperience(observationBeforeAction, action, rewardSinceLastExperience, stepResult.isTerminal()); + + rewardAtLastExperience = getReward(); + } + } + + @Override + protected void onAfterEpisode() { + learningBehavior.handleEpisodeEnd(getObservation()); + } + + @Override + protected void incrementEpisodeStepCount() { + super.incrementEpisodeStepCount(); + ++totalStepCount; + } + + // FIXME: parent is still visible + public static AgentLearner.Builder> builder(Environment environment, + TransformProcess transformProcess, + IPolicy policy, + ILearningBehavior learningBehavior) { + return new AgentLearner.Builder>(environment, transformProcess, policy, learningBehavior); + } + + public static class Builder> extends Agent.Builder { + + private final ILearningBehavior learningBehavior; + + public Builder(@NonNull Environment environment, + @NonNull TransformProcess transformProcess, + @NonNull IPolicy policy, + @NonNull ILearningBehavior learningBehavior) { + super(environment, transformProcess, policy); + + this.learningBehavior = learningBehavior; + } + + @Override + public AGENT_TYPE build() { + return (AGENT_TYPE)new AgentLearner(environment, transformProcess, policy, maxEpisodeSteps, id, learningBehavior); + } + } +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java new file mode 100644 index 000000000..7cbd68a70 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent; + +import org.deeplearning4j.rl4j.environment.Environment; +import org.deeplearning4j.rl4j.policy.IPolicy; + +/** + * The interface of {@link Agent} + * @param + */ +public interface IAgent { + /** + * Will play a single episode + */ + void run(); + + /** + * @return A user-supplied id to identify the IAgent instance. + */ + String getId(); + + /** + * @return The {@link Environment} instance being used by the agent. + */ + Environment getEnvironment(); + + /** + * @return The {@link IPolicy} instance being used by the agent. + */ + IPolicy getPolicy(); + + /** + * @return The step count taken in the current episode. + */ + int getEpisodeStepCount(); + + /** + * @return The cumulative reward received in the current episode. + */ + double getReward(); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java new file mode 100644 index 000000000..b1bdd1646 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent; + +public interface IAgentLearner extends IAgent { + + /** + * @return The total count of steps taken by this AgentLearner, for all episodes. + */ + int getTotalStepCount(); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java new file mode 100644 index 000000000..0187d8c3a --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java @@ -0,0 +1,49 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent.learning; + +import org.deeplearning4j.rl4j.observation.Observation; + +/** + * The ILearningBehavior implementations are in charge of the training. Through this interface, they are + * notified as new experience is generated. + * + * @param The type of action + */ +public interface ILearningBehavior { + + /** + * This method is called when a new episode has been started. + */ + void handleEpisodeStart(); + + /** + * This method is called when new experience is generated. + * + * @param observation The observation prior to taking the action + * @param action The action that has been taken + * @param reward The reward received by taking the action + * @param isTerminal True if the episode ended after taking the action + */ + void handleNewExperience(Observation observation, ACTION action, double reward, boolean isTerminal); + + /** + * This method is called when the episode ends or the maximum number of episode steps is reached. + * + * @param finalObservation The observation after the last action of the episode has been taken. + */ + void handleEpisodeEnd(Observation finalObservation); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java new file mode 100644 index 000000000..85c7ec4ce --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java @@ -0,0 +1,59 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent.learning; + +import lombok.Builder; +import org.deeplearning4j.rl4j.agent.update.IUpdateRule; +import org.deeplearning4j.rl4j.experience.ExperienceHandler; +import org.deeplearning4j.rl4j.observation.Observation; + +/** + * A generic {@link ILearningBehavior} that delegates the handling of experience to a {@link ExperienceHandler} and + * the update logic to a {@link IUpdateRule} + * + * @param The type of the action + * @param The type of experience the ExperienceHandler needs + */ +@Builder +public class LearningBehavior implements ILearningBehavior { + + @Builder.Default + private int experienceUpdateSize = 64; + + private final ExperienceHandler experienceHandler; + private final IUpdateRule updateRule; + + @Override + public void handleEpisodeStart() { + experienceHandler.reset(); + } + + @Override + public void handleNewExperience(Observation observation, ACTION action, double reward, boolean isTerminal) { + experienceHandler.addExperience(observation, action, reward, isTerminal); + if(experienceHandler.isTrainingBatchReady()) { + updateRule.update(experienceHandler.generateTrainingBatch()); + } + } + + @Override + public void handleEpisodeEnd(Observation finalObservation) { + experienceHandler.setFinalObservation(finalObservation); + if(experienceHandler.isTrainingBatchReady()) { + updateRule.update(experienceHandler.generateTrainingBatch()); + } + } +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java index 898f89241..f176da144 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java @@ -1,23 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.agent.listener; import org.deeplearning4j.rl4j.agent.Agent; import org.deeplearning4j.rl4j.environment.StepResult; import org.deeplearning4j.rl4j.observation.Observation; +/** + * The base definition of all {@link Agent} event listeners + */ public interface AgentListener { enum ListenerResponse { /** - * Tell the learning process to continue calling the listeners and the training. + * Tell the {@link Agent} to continue calling the listeners and the processing. */ CONTINUE, /** - * Tell the learning process to stop calling the listeners and terminate the training. + * Tell the {@link Agent} to interrupt calling the listeners and stop the processing. */ STOP, } + /** + * Called when a new episode is about to start. + * @param agent The agent that generated the event + * + * @return A {@link ListenerResponse}. + */ AgentListener.ListenerResponse onBeforeEpisode(Agent agent); + + /** + * Called when a step is about to be taken. + * + * @param agent The agent that generated the event + * @param observation The observation before the action is taken + * @param action The action that will be performed + * + * @return A {@link ListenerResponse}. + */ AgentListener.ListenerResponse onBeforeStep(Agent agent, Observation observation, ACTION action); + + /** + * Called after a step has been taken. + * + * @param agent The agent that generated the event + * @param stepResult The {@link StepResult} result of the step. + * + * @return A {@link ListenerResponse}. + */ AgentListener.ListenerResponse onAfterStep(Agent agent, StepResult stepResult); } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java index e003934d4..48538aeaf 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java @@ -1,3 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.agent.listener; import org.deeplearning4j.rl4j.agent.Agent; @@ -7,6 +22,10 @@ import org.deeplearning4j.rl4j.observation.Observation; import java.util.ArrayList; import java.util.List; +/** + * A class that manages a list of {@link AgentListener AgentListeners} listening to an {@link Agent}. + * @param + */ public class AgentListenerList { protected final List> listeners = new ArrayList<>(); @@ -18,6 +37,13 @@ public class AgentListenerList { listeners.add(listener); } + /** + * This method will notify all listeners that an episode is about to start. If a listener returns + * {@link AgentListener.ListenerResponse STOP}, any following listener is skipped. + * + * @param agent The agent that generated the event. + * @return False if the processing should be stopped + */ public boolean notifyBeforeEpisode(Agent agent) { for (AgentListener listener : listeners) { if (listener.onBeforeEpisode(agent) == AgentListener.ListenerResponse.STOP) { @@ -28,6 +54,13 @@ public class AgentListenerList { return true; } + /** + * + * @param agent The agent that generated the event. + * @param observation The observation before the action is taken + * @param action The action that will be performed + * @return False if the processing should be stopped + */ public boolean notifyBeforeStep(Agent agent, Observation observation, ACTION action) { for (AgentListener listener : listeners) { if (listener.onBeforeStep(agent, observation, action) == AgentListener.ListenerResponse.STOP) { @@ -38,6 +71,12 @@ public class AgentListenerList { return true; } + /** + * + * @param agent The agent that generated the event. + * @param stepResult The {@link StepResult} result of the step. + * @return False if the processing should be stopped + */ public boolean notifyAfterStep(Agent agent, StepResult stepResult) { for (AgentListener listener : listeners) { if (listener.onAfterStep(agent, stepResult) == AgentListener.ListenerResponse.STOP) { diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java new file mode 100644 index 000000000..46123d645 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent.update; + +import lombok.Getter; +import org.deeplearning4j.rl4j.learning.sync.Transition; +import org.deeplearning4j.rl4j.learning.sync.qlearning.TargetQNetworkSource; +import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.DoubleDQN; +import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.ITDTargetAlgorithm; +import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.StandardDQN; +import org.deeplearning4j.rl4j.network.dqn.IDQN; +import org.nd4j.linalg.dataset.api.DataSet; + +import java.util.List; + +// Temporary class that will be replaced with a more generic class that delegates gradient computation +// and network update to sub components. +public class DQNNeuralNetUpdateRule implements IUpdateRule>, TargetQNetworkSource { + + @Getter + private final IDQN qNetwork; + + @Getter + private IDQN targetQNetwork; + private final int targetUpdateFrequency; + + private final ITDTargetAlgorithm tdTargetAlgorithm; + + @Getter + private int updateCount = 0; + + public DQNNeuralNetUpdateRule(IDQN qNetwork, int targetUpdateFrequency, boolean isDoubleDQN, double gamma, double errorClamp) { + this.qNetwork = qNetwork; + this.targetQNetwork = qNetwork.clone(); + this.targetUpdateFrequency = targetUpdateFrequency; + tdTargetAlgorithm = isDoubleDQN + ? new DoubleDQN(this, gamma, errorClamp) + : new StandardDQN(this, gamma, errorClamp); + } + + @Override + public void update(List> trainingBatch) { + DataSet targets = tdTargetAlgorithm.computeTDTargets(trainingBatch); + qNetwork.fit(targets.getFeatures(), targets.getLabels()); + if(++updateCount % targetUpdateFrequency == 0) { + targetQNetwork = qNetwork.clone(); + } + } +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java new file mode 100644 index 000000000..4307efe1e --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent.update; + +import lombok.Value; +import org.deeplearning4j.nn.gradient.Gradient; + +// Work in progress +@Value +public class Gradients { + private Gradient[] gradients; // Temporary: we'll need something better than a Gradient[] + private int batchSize; +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java new file mode 100644 index 000000000..d679cba24 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.agent.update; + +import java.util.List; + +/** + * The role of IUpdateRule implementations is to use an experience batch to improve the accuracy of the policy. + * Used by {@link org.deeplearning4j.rl4j.agent.AgentLearner AgentLearner} + * @param The type of the experience + */ +public interface IUpdateRule { + /** + * Perform the update + * @param trainingBatch A batch of experience + */ + void update(List trainingBatch); + + /** + * @return The total number of times the policy has been updated. In a multi-agent learning context, this total is + * for all the agents. + */ + int getUpdateCount(); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java deleted file mode 100644 index f6521e734..000000000 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java +++ /dev/null @@ -1,9 +0,0 @@ -package org.deeplearning4j.rl4j.environment; - -import lombok.Value; - -@Value -public class ActionSchema { - private ACTION noOp; - //FIXME ACTION randomAction(); -} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java index 95ff7d2b6..7fa84cc51 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java @@ -1,11 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.environment; import java.util.Map; +/** + * An interface for environments used by the {@link org.deeplearning4j.rl4j.agent.Agent Agents}. + * @param The type of actions + */ public interface Environment { + + /** + * @return The {@link Schema} of the environment + */ Schema getSchema(); + + /** + * Reset the environment's state to start a new episode. + * @return + */ Map reset(); + + /** + * Perform a single step. + * + * @param action The action taken + * @return A {@link StepResult} describing the result of the step. + */ StepResult step(ACTION action); + + /** + * @return True if the episode is finished + */ boolean isEpisodeFinished(); + + /** + * Called when the agent is finished using this environment instance. + */ void close(); } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java new file mode 100644 index 000000000..9e6e81a7b --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.environment; + +import lombok.Value; + +// Work in progress +public interface IActionSchema { + ACTION getNoOp(); + + // Review: A schema should be data-only and not have behavior + ACTION getRandomAction(); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java new file mode 100644 index 000000000..cdf172da6 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.deeplearning4j.rl4j.environment; + +import org.nd4j.linalg.api.rng.Random; +import org.nd4j.linalg.factory.Nd4j; + +// Work in progress +public class IntegerActionSchema implements IActionSchema { + + private final int numActions; + private final int noOpAction; + private final Random rnd; + + public IntegerActionSchema(int numActions, int noOpAction) { + this(numActions, noOpAction, Nd4j.getRandom()); + } + + public IntegerActionSchema(int numActions, int noOpAction, Random rnd) { + this.numActions = numActions; + this.noOpAction = noOpAction; + this.rnd = rnd; + } + + @Override + public Integer getNoOp() { + return noOpAction; + } + + @Override + public Integer getRandomAction() { + return rnd.nextInt(numActions); + } +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java index 5ddea24cd..7768c0553 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java @@ -1,8 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.environment; import lombok.Value; +// Work in progress @Value public class Schema { - private ActionSchema actionSchema; + private IActionSchema actionSchema; } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java index b64dd08f5..4936625db 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java @@ -1,3 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ package org.deeplearning4j.rl4j.environment; import lombok.Value; diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java index 0017925df..e15c08415 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java @@ -41,6 +41,11 @@ public interface ExperienceHandler { */ int getTrainingBatchSize(); + /** + * @return True if a batch is ready for training. + */ + boolean isTrainingBatchReady(); + /** * The elements are returned in the historical order (i.e. in the order they happened) * @return The list of experience elements diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java index 74b7e3f05..c7f7d51ae 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java @@ -36,6 +36,7 @@ import java.util.List; public class ReplayMemoryExperienceHandler implements ExperienceHandler> { private static final int DEFAULT_MAX_REPLAY_MEMORY_SIZE = 150000; private static final int DEFAULT_BATCH_SIZE = 32; + private final int batchSize; private IExpReplay expReplay; @@ -43,6 +44,7 @@ public class ReplayMemoryExperienceHandler implements ExperienceHandler expReplay) { this.expReplay = expReplay; + this.batchSize = expReplay.getDesignatedBatchSize(); } public ReplayMemoryExperienceHandler(int maxReplayMemorySize, int batchSize, Random random) { @@ -64,6 +66,11 @@ public class ReplayMemoryExperienceHandler implements ExperienceHandler= batchSize; + } + /** * @return A batch of experience selected from the replay memory. The replay memory is unchanged after the call. */ diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java index 4c6b95c89..a8fae47bc 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java @@ -30,10 +30,18 @@ import java.util.List; */ public class StateActionExperienceHandler implements ExperienceHandler> { + private final int batchSize; + + private boolean isFinalObservationSet; + + public StateActionExperienceHandler(int batchSize) { + this.batchSize = batchSize; + } + private List> stateActionPairs = new ArrayList<>(); public void setFinalObservation(Observation observation) { - // Do nothing + isFinalObservationSet = true; } public void addExperience(Observation observation, A action, double reward, boolean isTerminal) { @@ -45,6 +53,12 @@ public class StateActionExperienceHandler implements ExperienceHandler= batchSize + || (isFinalObservationSet && stateActionPairs.size() > 0); + } + /** * The elements are returned in the historical order (i.e. in the order they happened) * Note: the experience store is cleared after calling this method. @@ -62,6 +76,7 @@ public class StateActionExperienceHandler implements ExperienceHandler(); + isFinalObservationSet = false; } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java index b42a7c503..9c35ed6f4 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java @@ -24,17 +24,38 @@ import org.nd4j.linalg.factory.Nd4j; * @author Alexandre Boulanger */ public class INDArrayHelper { - /** - * MultiLayerNetwork and ComputationGraph expects input data to be in NCHW in the case of pixels and NS in case of other data types. - * - * We must have either shape 2 (NK) or shape 4 (NCHW) + * Force the input source to have the correct shape: + *

    + *
  • DL4J requires it to be at least 2D
  • + *
  • RL4J has a convention to have the batch size on dimension 0 to all INDArrays
  • + *

+ * @param source The {@link INDArray} to be corrected. + * @return The corrected INDArray */ public static INDArray forceCorrectShape(INDArray source) { - return source.shape()[0] == 1 && source.shape().length > 1 + return source.shape()[0] == 1 && source.rank() > 1 ? source : Nd4j.expandDims(source, 0); } + + /** + * This will create a INDArray with batchSize as dimension 0 and shape as other dimensions. + * For example, if batchSize is 10 and shape is { 1, 3, 4 }, the resulting INDArray shape will be { 10, 3, 4} + * @param batchSize The size of the batch to create + * @param shape The shape of individual elements. + * Note: all shapes in RL4J should have a batch size as dimension 0; in this case the batch size should be 1. + * @return A INDArray + */ + public static INDArray createBatchForShape(long batchSize, long... shape) { + long[] batchShape; + + batchShape = new long[shape.length]; + System.arraycopy(shape, 0, batchShape, 0, shape.length); + + batchShape[0] = batchSize; + return Nd4j.create(batchShape); + } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java index c32be6906..bf8838424 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java @@ -25,6 +25,7 @@ import org.deeplearning4j.gym.StepReply; import org.deeplearning4j.rl4j.experience.ExperienceHandler; import org.deeplearning4j.rl4j.experience.StateActionExperienceHandler; import org.deeplearning4j.rl4j.learning.IHistoryProcessor; +import org.deeplearning4j.rl4j.learning.configuration.IAsyncLearningConfiguration; import org.deeplearning4j.rl4j.learning.listener.TrainingListenerList; import org.deeplearning4j.rl4j.mdp.MDP; import org.deeplearning4j.rl4j.network.NeuralNet; @@ -49,7 +50,7 @@ public abstract class AsyncThreadDiscrete asyncGlobal, MDP mdp, @@ -60,6 +61,17 @@ public abstract class AsyncThreadDiscrete ex @Override protected UpdateAlgorithm buildUpdateAlgorithm() { - int[] shape = getHistoryProcessor() == null ? getMdp().getObservationSpace().getShape() : getHistoryProcessor().getConf().getShape(); - return new QLearningUpdateAlgorithm(shape, getMdp().getActionSpace().getSize(), configuration.getGamma()); + return new QLearningUpdateAlgorithm(getMdp().getActionSpace().getSize(), configuration.getGamma()); } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java index 79c9666a2..f935240dc 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java @@ -17,7 +17,7 @@ package org.deeplearning4j.rl4j.learning.async.nstep.discrete; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.rl4j.experience.StateActionPair; -import org.deeplearning4j.rl4j.learning.Learning; +import org.deeplearning4j.rl4j.helper.INDArrayHelper; import org.deeplearning4j.rl4j.learning.async.UpdateAlgorithm; import org.deeplearning4j.rl4j.network.dqn.IDQN; import org.nd4j.linalg.api.ndarray.INDArray; @@ -27,15 +27,12 @@ import java.util.List; public class QLearningUpdateAlgorithm implements UpdateAlgorithm { - private final int[] shape; private final int actionSpaceSize; private final double gamma; - public QLearningUpdateAlgorithm(int[] shape, - int actionSpaceSize, + public QLearningUpdateAlgorithm(int actionSpaceSize, double gamma) { - this.shape = shape; this.actionSpaceSize = actionSpaceSize; this.gamma = gamma; } @@ -44,33 +41,34 @@ public class QLearningUpdateAlgorithm implements UpdateAlgorithm { public Gradient[] computeGradients(IDQN current, List> experience) { int size = experience.size(); - int[] nshape = Learning.makeShape(size, shape); - INDArray input = Nd4j.create(nshape); - INDArray targets = Nd4j.create(size, actionSpaceSize); - StateActionPair stateActionPair = experience.get(size - 1); + INDArray data = stateActionPair.getObservation().getData(); + INDArray features = INDArrayHelper.createBatchForShape(size, data.shape()); + INDArray targets = Nd4j.create(size, actionSpaceSize); + double r; if (stateActionPair.isTerminal()) { r = 0; } else { INDArray[] output = null; - output = current.outputAll(stateActionPair.getObservation().getData()); + output = current.outputAll(data); r = Nd4j.max(output[0]).getDouble(0); } for (int i = size - 1; i >= 0; i--) { stateActionPair = experience.get(i); + data = stateActionPair.getObservation().getData(); - input.putRow(i, stateActionPair.getObservation().getData()); + features.putRow(i, data); r = stateActionPair.getReward() + gamma * r; - INDArray[] output = current.outputAll(stateActionPair.getObservation().getData()); + INDArray[] output = current.outputAll(data); INDArray row = output[0]; row = row.putScalar(stateActionPair.getAction(), r); targets.putRow(i, row); } - return current.gradient(input, targets); + return current.gradient(features, targets); } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java index 93b4d1bb5..7bfcad53d 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java @@ -80,6 +80,11 @@ public class ExpReplay
implements IExpReplay { //log.info("size: "+storage.size()); } + @Override + public int getDesignatedBatchSize() { + return batchSize; + } + public int getBatchSize() { int storageSize = storage.size(); return Math.min(storageSize, batchSize); diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java index eaef5f0f8..8b2133806 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java @@ -47,4 +47,9 @@ public interface IExpReplay { * @param transition a new transition to store */ void store(Transition transition); + + /** + * @return The desired size of batches + */ + int getDesignatedBatchSize(); } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java index b2e06dc9c..d9c955e17 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java @@ -51,25 +51,16 @@ import java.util.List; @Slf4j public abstract class QLearning> extends SyncLearning - implements TargetQNetworkSource, IEpochTrainer { + implements IEpochTrainer { protected abstract LegacyMDPWrapper getLegacyMDPWrapper(); - protected abstract EpsGreedy getEgPolicy(); + protected abstract EpsGreedy getEgPolicy(); public abstract MDP getMdp(); public abstract IDQN getQNetwork(); - public abstract IDQN getTargetQNetwork(); - - protected abstract void setTargetQNetwork(IDQN dqn); - - protected void updateTargetNetwork() { - log.info("Update target network"); - setTargetQNetwork(getQNetwork().clone()); - } - public IDQN getNeuralNet() { return getQNetwork(); } @@ -101,11 +92,6 @@ public abstract class QLearning scores = new ArrayList<>(); while (currentEpisodeStepCount < getConfiguration().getMaxEpochStep() && !getMdp().isDone()) { - - if (this.getStepCount() % getConfiguration().getTargetDqnUpdateFreq() == 0) { - updateTargetNetwork(); - } - QLStepReturn stepR = trainStep(obs); if (!stepR.getMaxQ().isNaN()) { @@ -146,7 +132,6 @@ public abstract class QLearning refacInitMdp() { diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java index 771650340..4e357584d 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java @@ -21,6 +21,10 @@ import lombok.AccessLevel; import lombok.Getter; import lombok.Setter; import org.deeplearning4j.gym.StepReply; +import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior; +import org.deeplearning4j.rl4j.agent.learning.LearningBehavior; +import org.deeplearning4j.rl4j.agent.update.DQNNeuralNetUpdateRule; +import org.deeplearning4j.rl4j.agent.update.IUpdateRule; import org.deeplearning4j.rl4j.experience.ExperienceHandler; import org.deeplearning4j.rl4j.experience.ReplayMemoryExperienceHandler; import org.deeplearning4j.rl4j.learning.IHistoryProcessor; @@ -28,9 +32,6 @@ import org.deeplearning4j.rl4j.learning.Learning; import org.deeplearning4j.rl4j.learning.configuration.QLearningConfiguration; import org.deeplearning4j.rl4j.learning.sync.Transition; import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning; -import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.DoubleDQN; -import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.ITDTargetAlgorithm; -import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.StandardDQN; import org.deeplearning4j.rl4j.mdp.MDP; import org.deeplearning4j.rl4j.network.dqn.IDQN; import org.deeplearning4j.rl4j.space.Encodable; @@ -41,12 +42,8 @@ import org.deeplearning4j.rl4j.space.DiscreteSpace; import org.deeplearning4j.rl4j.util.LegacyMDPWrapper; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.rng.Random; -import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.factory.Nd4j; -import java.util.List; - - /** * @author rubenfiszel (ruben.fiszel@epfl.ch) 7/18/16. @@ -63,22 +60,15 @@ public abstract class QLearningDiscrete extends QLearning policy; @Getter - private EpsGreedy egPolicy; + private EpsGreedy egPolicy; @Getter final private IDQN qNetwork; - @Getter - @Setter(AccessLevel.PROTECTED) - private IDQN targetQNetwork; private int lastAction; private double accuReward = 0; - ITDTargetAlgorithm tdTargetAlgorithm; - - // TODO: User a builder and remove the setter - @Getter(AccessLevel.PROTECTED) @Setter - private ExperienceHandler> experienceHandler; + private final ILearningBehavior learningBehavior; protected LegacyMDPWrapper getLegacyMDPWrapper() { return mdp; @@ -88,21 +78,31 @@ public abstract class QLearningDiscrete extends QLearning mdp, IDQN dqn, QLearningConfiguration conf, int epsilonNbStep, Random random) { + this(mdp, dqn, conf, epsilonNbStep, buildLearningBehavior(dqn, conf, random), random); + } + public QLearningDiscrete(MDP mdp, IDQN dqn, QLearningConfiguration conf, - int epsilonNbStep, Random random) { + int epsilonNbStep, ILearningBehavior learningBehavior, Random random) { this.configuration = conf; this.mdp = new LegacyMDPWrapper<>(mdp, null); qNetwork = dqn; - targetQNetwork = dqn.clone(); policy = new DQNPolicy(getQNetwork()); egPolicy = new EpsGreedy(policy, mdp, conf.getUpdateStart(), epsilonNbStep, random, conf.getMinEpsilon(), this); - tdTargetAlgorithm = conf.isDoubleDQN() - ? new DoubleDQN(this, conf.getGamma(), conf.getErrorClamp()) - : new StandardDQN(this, conf.getGamma(), conf.getErrorClamp()); + this.learningBehavior = learningBehavior; + } + + private static ILearningBehavior buildLearningBehavior(IDQN qNetwork, QLearningConfiguration conf, Random random) { + IUpdateRule> updateRule = new DQNNeuralNetUpdateRule(qNetwork, conf.getTargetDqnUpdateFreq(), conf.isDoubleDQN(), conf.getGamma(), conf.getErrorClamp()); + ExperienceHandler> experienceHandler = new ReplayMemoryExperienceHandler(conf.getExpRepMaxSize(), conf.getBatchSize(), random); + return LearningBehavior.>builder() + .experienceHandler(experienceHandler) + .updateRule(updateRule) + .experienceUpdateSize(conf.getBatchSize()) + .build(); - experienceHandler = new ReplayMemoryExperienceHandler(conf.getExpRepMaxSize(), conf.getBatchSize(), random); } public MDP getMdp() { @@ -119,7 +119,7 @@ public abstract class QLearningDiscrete extends QLearning extends QLearning trainStep(Observation obs) { - boolean isHistoryProcessor = getHistoryProcessor() != null; - int skipFrame = isHistoryProcessor ? getHistoryProcessor().getConf().getSkipFrame() : 1; - int historyLength = isHistoryProcessor ? getHistoryProcessor().getConf().getHistoryLength() : 1; - int updateStart = this.getConfiguration().getUpdateStart() - + ((this.getConfiguration().getBatchSize() + historyLength) * skipFrame); - Double maxQ = Double.NaN; //ignore if Nan for stats //if step of training, just repeat lastAction @@ -160,29 +154,15 @@ public abstract class QLearningDiscrete extends QLearning updateStart) { - DataSet targets = setTarget(experienceHandler.generateTrainingBatch()); - getQNetwork().fit(targets.getFeatures(), targets.getLabels()); - } } return new QLStepReturn<>(maxQ, getQNetwork().getLatestScore(), stepReply); } - protected DataSet setTarget(List> transitions) { - if (transitions.size() == 0) - throw new IllegalArgumentException("too few transitions"); - - return tdTargetAlgorithm.computeTDTargets(transitions); - } - @Override protected void finishEpoch(Observation observation) { - experienceHandler.setFinalObservation(observation); + learningBehavior.handleEpisodeEnd(observation); } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java index 1e1348b4a..86907017b 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java @@ -2,21 +2,19 @@ package org.deeplearning4j.rl4j.mdp; import lombok.Getter; import lombok.Setter; -import org.deeplearning4j.rl4j.environment.ActionSchema; -import org.deeplearning4j.rl4j.environment.Environment; -import org.deeplearning4j.rl4j.environment.Schema; -import org.deeplearning4j.rl4j.environment.StepResult; +import org.deeplearning4j.rl4j.environment.*; +import org.nd4j.linalg.api.rng.Random; +import org.nd4j.linalg.factory.Nd4j; import java.util.HashMap; import java.util.Map; -import java.util.Random; public class CartpoleEnvironment implements Environment { private static final int NUM_ACTIONS = 2; private static final int ACTION_LEFT = 0; private static final int ACTION_RIGHT = 1; - private static final Schema schema = new Schema<>(new ActionSchema<>(ACTION_LEFT)); + private final Schema schema; public enum KinematicsIntegrators { Euler, SemiImplicitEuler }; @@ -48,11 +46,12 @@ public class CartpoleEnvironment implements Environment { private Integer stepsBeyondDone; public CartpoleEnvironment() { - rnd = new Random(); + this(Nd4j.getRandom()); } - public CartpoleEnvironment(int seed) { - rnd = new Random(seed); + public CartpoleEnvironment(Random rnd) { + this.rnd = rnd; + this.schema = new Schema(new IntegerActionSchema(NUM_ACTIONS, ACTION_LEFT, rnd)); } @Override diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java index a7282f139..f7422be92 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java @@ -17,16 +17,19 @@ package org.deeplearning4j.rl4j.policy; -import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.rl4j.environment.IActionSchema; import org.deeplearning4j.rl4j.learning.IEpochTrainer; import org.deeplearning4j.rl4j.mdp.MDP; import org.deeplearning4j.rl4j.network.NeuralNet; -import org.deeplearning4j.rl4j.space.Encodable; import org.deeplearning4j.rl4j.observation.Observation; import org.deeplearning4j.rl4j.space.ActionSpace; +import org.deeplearning4j.rl4j.space.Encodable; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.rng.Random; +import org.nd4j.linalg.factory.Nd4j; /** * @author rubenfiszel (ruben.fiszel@epfl.ch) 7/24/16. @@ -38,18 +41,60 @@ import org.nd4j.linalg.api.rng.Random; * epislon is annealed to minEpsilon over epsilonNbStep steps * */ -@AllArgsConstructor @Slf4j -public class EpsGreedy> extends Policy { +public class EpsGreedy extends Policy { - final private Policy policy; - final private MDP mdp; + final private INeuralNetPolicy policy; final private int updateStart; final private int epsilonNbStep; final private Random rnd; final private double minEpsilon; + + private final IActionSchema actionSchema; + + final private MDP> mdp; final private IEpochTrainer learning; + // Using agent's (learning's) step count is incorrect; frame skipping makes epsilon's value decrease too quickly + private int annealingStep = 0; + + @Deprecated + public > EpsGreedy(Policy policy, + MDP> mdp, + int updateStart, + int epsilonNbStep, + Random rnd, + double minEpsilon, + IEpochTrainer learning) { + this.policy = policy; + this.mdp = mdp; + this.updateStart = updateStart; + this.epsilonNbStep = epsilonNbStep; + this.rnd = rnd; + this.minEpsilon = minEpsilon; + this.learning = learning; + + this.actionSchema = null; + } + + public EpsGreedy(@NonNull Policy policy, @NonNull IActionSchema actionSchema, double minEpsilon, int updateStart, int epsilonNbStep) { + this(policy, actionSchema, minEpsilon, updateStart, epsilonNbStep, null); + } + + @Builder + public EpsGreedy(@NonNull INeuralNetPolicy policy, @NonNull IActionSchema actionSchema, double minEpsilon, int updateStart, int epsilonNbStep, Random rnd) { + this.policy = policy; + + this.rnd = rnd == null ? Nd4j.getRandom() : rnd; + this.minEpsilon = minEpsilon; + this.updateStart = updateStart; + this.epsilonNbStep = epsilonNbStep; + this.actionSchema = actionSchema; + + this.mdp = null; + this.learning = null; + } + public NeuralNet getNeuralNet() { return policy.getNeuralNet(); } @@ -57,6 +102,11 @@ public class EpsGreedy ep) @@ -66,10 +116,31 @@ public class EpsGreedy ep) { + result = policy.nextAction(observation); + } + else { + result = actionSchema.getRandomAction(); + } + + ++annealingStep; + + return result; } public double getEpsilon() { - return Math.min(1.0, Math.max(minEpsilon, 1.0 - (learning.getStepCount() - updateStart) * 1.0 / epsilonNbStep)); + int step = actionSchema != null ? annealingStep : learning.getStepCount(); + return Math.min(1.0, Math.max(minEpsilon, 1.0 - (step - updateStart) * 1.0 / epsilonNbStep)); } } diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java new file mode 100644 index 000000000..c213396c6 --- /dev/null +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java @@ -0,0 +1,7 @@ +package org.deeplearning4j.rl4j.policy; + +import org.deeplearning4j.rl4j.network.NeuralNet; + +public interface INeuralNetPolicy extends IPolicy { + NeuralNet getNeuralNet(); +} diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java index 6a4146c94..cf369e359 100644 --- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java +++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java @@ -34,7 +34,7 @@ import org.deeplearning4j.rl4j.util.LegacyMDPWrapper; * * A Policy responsability is to choose the next action given a state */ -public abstract class Policy implements IPolicy { +public abstract class Policy implements INeuralNetPolicy { public abstract NeuralNet getNeuralNet(); diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java new file mode 100644 index 000000000..e0c0685bf --- /dev/null +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java @@ -0,0 +1,211 @@ +package org.deeplearning4j.rl4j.agent; + +import org.deeplearning4j.rl4j.agent.learning.LearningBehavior; +import org.deeplearning4j.rl4j.environment.Environment; +import org.deeplearning4j.rl4j.environment.IntegerActionSchema; +import org.deeplearning4j.rl4j.environment.Schema; +import org.deeplearning4j.rl4j.environment.StepResult; +import org.deeplearning4j.rl4j.observation.Observation; +import org.deeplearning4j.rl4j.observation.transform.TransformProcess; +import org.deeplearning4j.rl4j.policy.IPolicy; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.MockitoJUnitRunner; +import org.mockito.stubbing.Answer; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; +import static org.junit.Assert.*; + +@RunWith(MockitoJUnitRunner.class) +public class AgentLearnerTest { + + @Mock + Environment environmentMock; + + @Mock + TransformProcess transformProcessMock; + + @Mock + IPolicy policyMock; + + @Mock + LearningBehavior learningBehaviorMock; + + @Test + public void when_episodeIsStarted_expect_learningBehaviorHandleEpisodeStartCalled() { + // Arrange + AgentLearner sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock) + .maxEpisodeSteps(3) + .build(); + + Schema schema = new Schema(new IntegerActionSchema(0, -1)); + when(environmentMock.reset()).thenReturn(new HashMap<>()); + when(environmentMock.getSchema()).thenReturn(schema); + StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false); + when(environmentMock.step(any(Integer.class))).thenReturn(stepResult); + + when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean())).thenReturn(new Observation(Nd4j.create(new double[] { 123.0 }))); + + when(policyMock.nextAction(any(Observation.class))).thenReturn(123); + + // Act + sut.run(); + + // Assert + verify(learningBehaviorMock, times(1)).handleEpisodeStart(); + } + + @Test + public void when_runIsCalled_expect_experienceHandledWithLearningBehavior() { + // Arrange + AgentLearner sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock) + .maxEpisodeSteps(4) + .build(); + + Schema schema = new Schema(new IntegerActionSchema(0, -1)); + when(environmentMock.getSchema()).thenReturn(schema); + when(environmentMock.reset()).thenReturn(new HashMap<>()); + + double[] reward = new double[] { 0.0 }; + when(environmentMock.step(any(Integer.class))) + .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0)); + + when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0); + + when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean())) + .thenAnswer(new Answer() { + public Observation answer(InvocationOnMock invocation) throws Throwable { + int step = (int)invocation.getArgument(1); + boolean isTerminal = (boolean)invocation.getArgument(2); + return (step % 2 == 0 || isTerminal) + ? new Observation(Nd4j.create(new double[] { step * 1.1 })) + : Observation.SkippedObservation; + } + }); + + when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]); + + // Act + sut.run(); + + // Assert + ArgumentCaptor observationCaptor = ArgumentCaptor.forClass(Observation.class); + ArgumentCaptor actionCaptor = ArgumentCaptor.forClass(Integer.class); + ArgumentCaptor rewardCaptor = ArgumentCaptor.forClass(Double.class); + ArgumentCaptor isTerminalCaptor = ArgumentCaptor.forClass(Boolean.class); + + verify(learningBehaviorMock, times(2)).handleNewExperience(observationCaptor.capture(), actionCaptor.capture(), rewardCaptor.capture(), isTerminalCaptor.capture()); + List observations = observationCaptor.getAllValues(); + List actions = actionCaptor.getAllValues(); + List rewards = rewardCaptor.getAllValues(); + List isTerminalList = isTerminalCaptor.getAllValues(); + + assertEquals(0.0, observations.get(0).getData().getDouble(0), 0.00001); + assertEquals(0, (int)actions.get(0)); + assertEquals(0.0 + 1.0, rewards.get(0), 0.00001); + assertFalse(isTerminalList.get(0)); + + assertEquals(2.2, observations.get(1).getData().getDouble(0), 0.00001); + assertEquals(2, (int)actions.get(1)); + assertEquals(2.0 + 3.0, rewards.get(1), 0.00001); + assertFalse(isTerminalList.get(1)); + + ArgumentCaptor finalObservationCaptor = ArgumentCaptor.forClass(Observation.class); + verify(learningBehaviorMock, times(1)).handleEpisodeEnd(finalObservationCaptor.capture()); + assertEquals(4.4, finalObservationCaptor.getValue().getData().getDouble(0), 0.00001); + } + + @Test + public void when_runIsCalledMultipleTimes_expect_totalStepCountCorrect() { + // Arrange + AgentLearner sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock) + .maxEpisodeSteps(4) + .build(); + + Schema schema = new Schema(new IntegerActionSchema(0, -1)); + when(environmentMock.getSchema()).thenReturn(schema); + when(environmentMock.reset()).thenReturn(new HashMap<>()); + + double[] reward = new double[] { 0.0 }; + when(environmentMock.step(any(Integer.class))) + .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0)); + + when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0); + + when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean())) + .thenAnswer(new Answer() { + public Observation answer(InvocationOnMock invocation) throws Throwable { + int step = (int)invocation.getArgument(1); + boolean isTerminal = (boolean)invocation.getArgument(2); + return (step % 2 == 0 || isTerminal) + ? new Observation(Nd4j.create(new double[] { step * 1.1 })) + : Observation.SkippedObservation; + } + }); + + when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]); + + // Act + sut.run(); + reward[0] = 0.0; + sut.run(); + + // Assert + assertEquals(8, sut.getTotalStepCount()); + } + + @Test + public void when_runIsCalledMultipleTimes_expect_rewardSentToLearningBehaviorToBeCorrect() { + // Arrange + AgentLearner sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock) + .maxEpisodeSteps(4) + .build(); + + Schema schema = new Schema(new IntegerActionSchema(0, -1)); + when(environmentMock.getSchema()).thenReturn(schema); + when(environmentMock.reset()).thenReturn(new HashMap<>()); + + double[] reward = new double[] { 0.0 }; + when(environmentMock.step(any(Integer.class))) + .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0)); + + when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0); + + when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean())) + .thenAnswer(new Answer() { + public Observation answer(InvocationOnMock invocation) throws Throwable { + int step = (int)invocation.getArgument(1); + boolean isTerminal = (boolean)invocation.getArgument(2); + return (step % 2 == 0 || isTerminal) + ? new Observation(Nd4j.create(new double[] { step * 1.1 })) + : Observation.SkippedObservation; + } + }); + + when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]); + + // Act + sut.run(); + reward[0] = 0.0; + sut.run(); + + // Assert + ArgumentCaptor rewardCaptor = ArgumentCaptor.forClass(Double.class); + + verify(learningBehaviorMock, times(4)).handleNewExperience(any(Observation.class), any(Integer.class), rewardCaptor.capture(), any(Boolean.class)); + List rewards = rewardCaptor.getAllValues(); + + // rewardAtLastExperience at the end of 1st call to .run() should not leak into 2nd call. + assertEquals(0.0 + 1.0, rewards.get(2), 0.00001); + assertEquals(2.0 + 3.0, rewards.get(3), 0.00001); + } +} \ No newline at end of file diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java index a8beae640..0022e61f0 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java @@ -1,10 +1,7 @@ package org.deeplearning4j.rl4j.agent; import org.deeplearning4j.rl4j.agent.listener.AgentListener; -import org.deeplearning4j.rl4j.environment.ActionSchema; -import org.deeplearning4j.rl4j.environment.Environment; -import org.deeplearning4j.rl4j.environment.Schema; -import org.deeplearning4j.rl4j.environment.StepResult; +import org.deeplearning4j.rl4j.environment.*; import org.deeplearning4j.rl4j.observation.Observation; import org.deeplearning4j.rl4j.observation.transform.TransformProcess; import org.deeplearning4j.rl4j.policy.IPolicy; @@ -12,6 +9,7 @@ import org.junit.Rule; import org.junit.Test; import static org.junit.Assert.*; +import org.junit.runner.RunWith; import org.mockito.*; import org.mockito.junit.*; import org.nd4j.linalg.factory.Nd4j; @@ -23,8 +21,8 @@ import java.util.Map; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; +@RunWith(MockitoJUnitRunner.class) public class AgentTest { - @Mock Environment environmentMock; @Mock TransformProcess transformProcessMock; @Mock IPolicy policyMock; @@ -102,7 +100,7 @@ public class AgentTest { public void when_runIsCalled_expect_agentIsReset() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -119,7 +117,7 @@ public class AgentTest { sut.run(); // Assert - assertEquals(0, sut.getEpisodeStepNumber()); + assertEquals(0, sut.getEpisodeStepCount()); verify(transformProcessMock).transform(envResetResult, 0, false); verify(policyMock, times(1)).reset(); assertEquals(0.0, sut.getReward(), 0.00001); @@ -130,7 +128,7 @@ public class AgentTest { public void when_runIsCalled_expect_onBeforeAndAfterEpisodeCalled() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -152,7 +150,7 @@ public class AgentTest { public void when_onBeforeEpisodeReturnsStop_expect_performStepAndOnAfterEpisodeNotCalled() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -179,7 +177,7 @@ public class AgentTest { public void when_runIsCalledWithoutMaxStep_expect_agentRunUntilEpisodeIsFinished() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -191,10 +189,10 @@ public class AgentTest { final Agent spy = Mockito.spy(sut); doAnswer(invocation -> { - ((Agent)invocation.getMock()).incrementEpisodeStepNumber(); + ((Agent)invocation.getMock()).incrementEpisodeStepCount(); return null; }).when(spy).performStep(); - when(environmentMock.isEpisodeFinished()).thenAnswer(invocation -> spy.getEpisodeStepNumber() >= 5 ); + when(environmentMock.isEpisodeFinished()).thenAnswer(invocation -> spy.getEpisodeStepCount() >= 5 ); // Act spy.run(); @@ -209,7 +207,7 @@ public class AgentTest { public void when_maxStepsIsReachedBeforeEposideEnds_expect_runTerminated() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -222,7 +220,7 @@ public class AgentTest { final Agent spy = Mockito.spy(sut); doAnswer(invocation -> { - ((Agent)invocation.getMock()).incrementEpisodeStepNumber(); + ((Agent)invocation.getMock()).incrementEpisodeStepCount(); return null; }).when(spy).performStep(); @@ -239,7 +237,7 @@ public class AgentTest { public void when_initialObservationsAreSkipped_expect_performNoOpAction() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -264,7 +262,7 @@ public class AgentTest { public void when_initialObservationsAreSkipped_expect_performNoOpActionAnd() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.getSchema()).thenReturn(schema); @@ -289,7 +287,7 @@ public class AgentTest { public void when_observationsIsSkipped_expect_performLastAction() { // Arrange Map envResetResult = new HashMap<>(); - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(envResetResult); when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(envResetResult, 0.0, false)); when(environmentMock.getSchema()).thenReturn(schema); @@ -331,7 +329,7 @@ public class AgentTest { @Test public void when_onBeforeStepReturnsStop_expect_performStepAndOnAfterEpisodeNotCalled() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); @@ -358,7 +356,7 @@ public class AgentTest { @Test public void when_observationIsNotSkipped_expect_policyActionIsSentToEnvironment() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(new HashMap<>(), 0.0, false)); @@ -381,7 +379,7 @@ public class AgentTest { @Test public void when_stepResultIsReceived_expect_observationAndRewardUpdated() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(new HashMap<>(), 234.0, false)); @@ -405,7 +403,7 @@ public class AgentTest { @Test public void when_stepIsDone_expect_onAfterStepAndWithStepResult() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false); @@ -430,7 +428,7 @@ public class AgentTest { @Test public void when_onAfterStepReturnsStop_expect_onAfterEpisodeNotCalled() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false); @@ -458,7 +456,7 @@ public class AgentTest { @Test public void when_runIsCalled_expect_onAfterEpisodeIsCalled() { // Arrange - Schema schema = new Schema(new ActionSchema<>(-1)); + Schema schema = new Schema(new IntegerActionSchema(0, -1)); when(environmentMock.reset()).thenReturn(new HashMap<>()); when(environmentMock.getSchema()).thenReturn(schema); StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false); diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java new file mode 100644 index 000000000..1e39c63d5 --- /dev/null +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java @@ -0,0 +1,133 @@ +package org.deeplearning4j.rl4j.agent.learning; + +import org.deeplearning4j.rl4j.agent.update.IUpdateRule; +import org.deeplearning4j.rl4j.experience.ExperienceHandler; +import org.deeplearning4j.rl4j.observation.Observation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@RunWith(MockitoJUnitRunner.class) +public class LearningBehaviorTest { + + @Mock + ExperienceHandler experienceHandlerMock; + + @Mock + IUpdateRule updateRuleMock; + + LearningBehavior sut; + + @Before + public void setup() { + sut = LearningBehavior.builder() + .experienceHandler(experienceHandlerMock) + .updateRule(updateRuleMock) + .build(); + } + + @Test + public void when_callingHandleEpisodeStart_expect_experienceHandlerResetCalled() { + // Arrange + LearningBehavior sut = LearningBehavior.builder() + .experienceHandler(experienceHandlerMock) + .updateRule(updateRuleMock) + .build(); + + // Act + sut.handleEpisodeStart(); + + // Assert + verify(experienceHandlerMock, times(1)).reset(); + } + + @Test + public void when_callingHandleNewExperience_expect_experienceHandlerAddExperienceCalled() { + // Arrange + INDArray observationData = Nd4j.rand(1, 1); + when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(false); + + // Act + sut.handleNewExperience(new Observation(observationData), 1, 2.0, false); + + // Assert + ArgumentCaptor observationCaptor = ArgumentCaptor.forClass(Observation.class); + ArgumentCaptor actionCaptor = ArgumentCaptor.forClass(Integer.class); + ArgumentCaptor rewardCaptor = ArgumentCaptor.forClass(Double.class); + ArgumentCaptor isTerminatedCaptor = ArgumentCaptor.forClass(Boolean.class); + verify(experienceHandlerMock, times(1)).addExperience(observationCaptor.capture(), actionCaptor.capture(), rewardCaptor.capture(), isTerminatedCaptor.capture()); + + assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001); + assertEquals(1, (int)actionCaptor.getValue()); + assertEquals(2.0, (double)rewardCaptor.getValue(), 0.00001); + assertFalse(isTerminatedCaptor.getValue()); + + verify(updateRuleMock, never()).update(any(List.class)); + } + + @Test + public void when_callingHandleNewExperienceAndTrainingBatchIsReady_expect_updateRuleUpdateWithTrainingBatch() { + // Arrange + INDArray observationData = Nd4j.rand(1, 1); + when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(true); + List trainingBatch = new ArrayList(); + when(experienceHandlerMock.generateTrainingBatch()).thenReturn(trainingBatch); + + // Act + sut.handleNewExperience(new Observation(observationData), 1, 2.0, false); + + // Assert + verify(updateRuleMock, times(1)).update(trainingBatch); + } + + @Test + public void when_callingHandleEpisodeEnd_expect_experienceHandlerSetFinalObservationCalled() { + // Arrange + INDArray observationData = Nd4j.rand(1, 1); + when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(false); + + // Act + sut.handleEpisodeEnd(new Observation(observationData)); + + // Assert + ArgumentCaptor observationCaptor = ArgumentCaptor.forClass(Observation.class); + verify(experienceHandlerMock, times(1)).setFinalObservation(observationCaptor.capture()); + + assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001); + + verify(updateRuleMock, never()).update(any(List.class)); + } + + @Test + public void when_callingHandleEpisodeEndAndTrainingBatchIsNotEmpty_expect_updateRuleUpdateWithTrainingBatch() { + // Arrange + INDArray observationData = Nd4j.rand(1, 1); + when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(true); + List trainingBatch = new ArrayList(); + when(experienceHandlerMock.generateTrainingBatch()).thenReturn(trainingBatch); + + // Act + sut.handleEpisodeEnd(new Observation(observationData)); + + // Assert + ArgumentCaptor observationCaptor = ArgumentCaptor.forClass(Observation.class); + verify(experienceHandlerMock, times(1)).setFinalObservation(observationCaptor.capture()); + + assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001); + + verify(updateRuleMock, times(1)).update(trainingBatch); + } +} diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java index 765a14c8f..0d90e812d 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java @@ -4,34 +4,44 @@ import org.deeplearning4j.rl4j.learning.sync.IExpReplay; import org.deeplearning4j.rl4j.learning.sync.Transition; import org.deeplearning4j.rl4j.observation.Observation; import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; import org.nd4j.linalg.factory.Nd4j; -import java.util.ArrayList; import java.util.List; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; +@RunWith(MockitoJUnitRunner.class) public class ReplayMemoryExperienceHandlerTest { + + @Mock + IExpReplay expReplayMock; + @Test public void when_addingFirstExperience_expect_notAddedToStoreBeforeNextObservationIsAdded() { // Arrange - TestExpReplay expReplayMock = new TestExpReplay(); + when(expReplayMock.getDesignatedBatchSize()).thenReturn(10); + ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock); // Act sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false); - int numStoredTransitions = expReplayMock.addedTransitions.size(); + boolean isStoreCalledAfterFirstAdd = mockingDetails(expReplayMock).getInvocations().stream().anyMatch(x -> x.getMethod().getName() == "store"); sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false); + boolean isStoreCalledAfterSecondAdd = mockingDetails(expReplayMock).getInvocations().stream().anyMatch(x -> x.getMethod().getName() == "store"); // Assert - assertEquals(0, numStoredTransitions); - assertEquals(1, expReplayMock.addedTransitions.size()); + assertFalse(isStoreCalledAfterFirstAdd); + assertTrue(isStoreCalledAfterSecondAdd); } @Test public void when_addingExperience_expect_transitionsAreCorrect() { // Arrange - TestExpReplay expReplayMock = new TestExpReplay(); ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock); // Act @@ -40,24 +50,25 @@ public class ReplayMemoryExperienceHandlerTest { sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 }))); // Assert - assertEquals(2, expReplayMock.addedTransitions.size()); + ArgumentCaptor> argument = ArgumentCaptor.forClass(Transition.class); + verify(expReplayMock, times(2)).store(argument.capture()); + List> transitions = argument.getAllValues(); - assertEquals(1.0, expReplayMock.addedTransitions.get(0).getObservation().getData().getDouble(0), 0.00001); - assertEquals(1, (int)expReplayMock.addedTransitions.get(0).getAction()); - assertEquals(1.0, expReplayMock.addedTransitions.get(0).getReward(), 0.00001); - assertEquals(2.0, expReplayMock.addedTransitions.get(0).getNextObservation().getDouble(0), 0.00001); + assertEquals(1.0, transitions.get(0).getObservation().getData().getDouble(0), 0.00001); + assertEquals(1, (int)transitions.get(0).getAction()); + assertEquals(1.0, transitions.get(0).getReward(), 0.00001); + assertEquals(2.0, transitions.get(0).getNextObservation().getDouble(0), 0.00001); - assertEquals(2.0, expReplayMock.addedTransitions.get(1).getObservation().getData().getDouble(0), 0.00001); - assertEquals(2, (int)expReplayMock.addedTransitions.get(1).getAction()); - assertEquals(2.0, expReplayMock.addedTransitions.get(1).getReward(), 0.00001); - assertEquals(3.0, expReplayMock.addedTransitions.get(1).getNextObservation().getDouble(0), 0.00001); + assertEquals(2.0, transitions.get(1).getObservation().getData().getDouble(0), 0.00001); + assertEquals(2, (int)transitions.get(1).getAction()); + assertEquals(2.0, transitions.get(1).getReward(), 0.00001); + assertEquals(3.0, transitions.get(1).getNextObservation().getDouble(0), 0.00001); } @Test public void when_settingFinalObservation_expect_nextAddedExperienceDoNotUsePreviousObservation() { // Arrange - TestExpReplay expReplayMock = new TestExpReplay(); ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock); // Act @@ -66,42 +77,57 @@ public class ReplayMemoryExperienceHandlerTest { sut.addExperience(new Observation(Nd4j.create(new double[] { 3.0 })), 3, 3.0, false); // Assert - assertEquals(1, expReplayMock.addedTransitions.size()); - assertEquals(1, (int)expReplayMock.addedTransitions.get(0).getAction()); + ArgumentCaptor> argument = ArgumentCaptor.forClass(Transition.class); + verify(expReplayMock, times(1)).store(argument.capture()); + Transition transition = argument.getValue(); + + assertEquals(1, (int)transition.getAction()); } @Test public void when_addingExperience_expect_getTrainingBatchSizeReturnSize() { // Arrange - TestExpReplay expReplayMock = new TestExpReplay(); - ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock); + ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom()); sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false); sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false); sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 }))); // Act int size = sut.getTrainingBatchSize(); + // Assert assertEquals(2, size); } - private static class TestExpReplay implements IExpReplay { + @Test + public void when_experienceSizeIsSmallerThanBatchSize_expect_TrainingBatchIsNotReady() { + // Arrange + ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom()); + sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false); + sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false); + sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 }))); - public final List> addedTransitions = new ArrayList<>(); + // Act - @Override - public ArrayList> getBatch() { - return null; - } - - @Override - public void store(Transition transition) { - addedTransitions.add(transition); - } - - @Override - public int getBatchSize() { - return addedTransitions.size(); - } + // Assert + assertFalse(sut.isTrainingBatchReady()); } + + @Test + public void when_experienceSizeIsGreaterOrEqualToBatchSize_expect_TrainingBatchIsReady() { + // Arrange + ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom()); + sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false); + sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false); + sut.addExperience(new Observation(Nd4j.create(new double[] { 3.0 })), 3, 3.0, false); + sut.addExperience(new Observation(Nd4j.create(new double[] { 4.0 })), 4, 4.0, false); + sut.addExperience(new Observation(Nd4j.create(new double[] { 5.0 })), 5, 5.0, false); + sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 6.0 }))); + + // Act + + // Assert + assertTrue(sut.isTrainingBatchReady()); + } + } diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java index 7334ff87a..2ce0d6659 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java @@ -13,7 +13,7 @@ public class StateActionExperienceHandlerTest { @Test public void when_addingExperience_expect_generateTrainingBatchReturnsIt() { // Arrange - StateActionExperienceHandler sut = new StateActionExperienceHandler(); + StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE); sut.reset(); Observation observation = new Observation(Nd4j.zeros(1)); sut.addExperience(observation, 123, 234.0, true); @@ -32,7 +32,7 @@ public class StateActionExperienceHandlerTest { @Test public void when_addingMultipleExperiences_expect_generateTrainingBatchReturnsItInSameOrder() { // Arrange - StateActionExperienceHandler sut = new StateActionExperienceHandler(); + StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE); sut.reset(); sut.addExperience(null, 1, 1.0, false); sut.addExperience(null, 2, 2.0, false); @@ -51,7 +51,7 @@ public class StateActionExperienceHandlerTest { @Test public void when_gettingExperience_expect_experienceStoreIsCleared() { // Arrange - StateActionExperienceHandler sut = new StateActionExperienceHandler(); + StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE); sut.reset(); sut.addExperience(null, 1, 1.0, false); @@ -67,7 +67,7 @@ public class StateActionExperienceHandlerTest { @Test public void when_addingExperience_expect_getTrainingBatchSizeReturnSize() { // Arrange - StateActionExperienceHandler sut = new StateActionExperienceHandler(); + StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE); sut.reset(); sut.addExperience(null, 1, 1.0, false); sut.addExperience(null, 2, 2.0, false); @@ -79,4 +79,66 @@ public class StateActionExperienceHandlerTest { // Assert assertEquals(3, size); } + + @Test + public void when_experienceIsEmpty_expect_TrainingBatchNotReady() { + // Arrange + StateActionExperienceHandler sut = new StateActionExperienceHandler(5); + sut.reset(); + + // Act + boolean isTrainingBatchReady = sut.isTrainingBatchReady(); + + // Assert + assertFalse(isTrainingBatchReady); + } + + @Test + public void when_experienceSizeIsGreaterOrEqualToThanBatchSize_expect_TrainingBatchIsReady() { + // Arrange + StateActionExperienceHandler sut = new StateActionExperienceHandler(5); + sut.reset(); + sut.addExperience(null, 1, 1.0, false); + sut.addExperience(null, 2, 2.0, false); + sut.addExperience(null, 3, 3.0, false); + sut.addExperience(null, 4, 4.0, false); + sut.addExperience(null, 5, 5.0, false); + + // Act + boolean isTrainingBatchReady = sut.isTrainingBatchReady(); + + // Assert + assertTrue(isTrainingBatchReady); + } + + @Test + public void when_experienceSizeIsSmallerThanBatchSizeButFinalObservationIsSet_expect_TrainingBatchIsReady() { + // Arrange + StateActionExperienceHandler sut = new StateActionExperienceHandler(5); + sut.reset(); + sut.addExperience(null, 1, 1.0, false); + sut.addExperience(null, 2, 2.0, false); + sut.setFinalObservation(null); + + // Act + boolean isTrainingBatchReady = sut.isTrainingBatchReady(); + + // Assert + assertTrue(isTrainingBatchReady); + } + + @Test + public void when_experienceSizeIsZeroAndFinalObservationIsSet_expect_TrainingBatchIsNotReady() { + // Arrange + StateActionExperienceHandler sut = new StateActionExperienceHandler(5); + sut.reset(); + sut.setFinalObservation(null); + + // Act + boolean isTrainingBatchReady = sut.isTrainingBatchReady(); + + // Assert + assertFalse(isTrainingBatchReady); + } + } diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java index e1c5c64ed..7af15b8c4 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java @@ -49,4 +49,25 @@ public class INDArrayHelperTest { assertEquals(1, output.shape()[1]); } + @Test + public void when_callingCreateBatchForShape_expect_INDArrayWithCorrectShapeAndOriginalShapeUnchanged() { + // Arrange + long[] shape = new long[] { 1, 3, 4}; + + // Act + INDArray output = INDArrayHelper.createBatchForShape(2, shape); + + // Assert + // Output shape + assertEquals(3, output.shape().length); + assertEquals(2, output.shape()[0]); + assertEquals(3, output.shape()[1]); + assertEquals(4, output.shape()[2]); + + // Input should remain unchanged + assertEquals(1, shape[0]); + assertEquals(3, shape[1]); + assertEquals(4, shape[2]); + + } } diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java index f44437d67..ae83bd1f0 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java @@ -19,10 +19,11 @@ package org.deeplearning4j.rl4j.learning.async.nstep.discrete; import org.deeplearning4j.rl4j.experience.StateActionPair; import org.deeplearning4j.rl4j.learning.async.AsyncGlobal; import org.deeplearning4j.rl4j.learning.async.UpdateAlgorithm; +import org.deeplearning4j.rl4j.network.dqn.IDQN; import org.deeplearning4j.rl4j.observation.Observation; -import org.deeplearning4j.rl4j.support.MockDQN; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.junit.MockitoJUnitRunner; import org.nd4j.linalg.api.ndarray.INDArray; @@ -32,6 +33,9 @@ import java.util.ArrayList; import java.util.List; import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.*; @RunWith(MockitoJUnitRunner.class) public class QLearningUpdateAlgorithmTest { @@ -39,12 +43,24 @@ public class QLearningUpdateAlgorithmTest { @Mock AsyncGlobal mockAsyncGlobal; + @Mock + IDQN dqnMock; + + private UpdateAlgorithm sut; + + private void setup(double gamma) { + // mock a neural net output -- just invert the sign of the input + when(dqnMock.outputAll(any(INDArray.class))).thenAnswer(invocation -> new INDArray[] { invocation.getArgument(0, INDArray.class).mul(-1.0) }); + + sut = new QLearningUpdateAlgorithm(2, gamma); + } + @Test public void when_isTerminal_expect_initRewardIs0() { // Arrange - MockDQN dqnMock = new MockDQN(); - UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 1 }, 1, 1.0); - final Observation observation = new Observation(Nd4j.zeros(1)); + setup(1.0); + + final Observation observation = new Observation(Nd4j.zeros(1, 2)); List> experience = new ArrayList>() { { add(new StateActionPair(observation, 0, 0.0, true)); @@ -55,59 +71,68 @@ public class QLearningUpdateAlgorithmTest { sut.computeGradients(dqnMock, experience); // Assert - assertEquals(0.0, dqnMock.gradientParams.get(0).getRight().getDouble(0), 0.00001); + verify(dqnMock, times(1)).gradient(any(INDArray.class), argThat((INDArray x) -> x.getDouble(0) == 0.0)); } @Test public void when_terminalAndNoTargetUpdate_expect_initRewardWithMaxQFromCurrent() { // Arrange - UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 2 }, 2, 1.0); - final Observation observation = new Observation(Nd4j.create(new double[] { -123.0, -234.0 })); + setup(1.0); + + final Observation observation = new Observation(Nd4j.create(new double[] { -123.0, -234.0 }).reshape(1, 2)); List> experience = new ArrayList>() { { add(new StateActionPair(observation, 0, 0.0, false)); } }; - MockDQN dqnMock = new MockDQN(); // Act sut.computeGradients(dqnMock, experience); // Assert - assertEquals(2, dqnMock.outputAllParams.size()); - assertEquals(-123.0, dqnMock.outputAllParams.get(0).getDouble(0, 0), 0.00001); - assertEquals(234.0, dqnMock.gradientParams.get(0).getRight().getDouble(0), 0.00001); + ArgumentCaptor argument = ArgumentCaptor.forClass(INDArray.class); + + verify(dqnMock, times(2)).outputAll(argument.capture()); + List values = argument.getAllValues(); + assertEquals(-123.0, values.get(0).getDouble(0, 0), 0.00001); + assertEquals(-123.0, values.get(1).getDouble(0, 0), 0.00001); + + verify(dqnMock, times(1)).gradient(any(INDArray.class), argThat((INDArray x) -> x.getDouble(0) == 234.0)); } @Test public void when_callingWithMultipleExperiences_expect_gradientsAreValid() { // Arrange double gamma = 0.9; - UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 2 }, 2, gamma); + setup(gamma); + List> experience = new ArrayList>() { { - add(new StateActionPair(new Observation(Nd4j.create(new double[] { -1.1, -1.2 })), 0, 1.0, false)); - add(new StateActionPair(new Observation(Nd4j.create(new double[] { -2.1, -2.2 })), 1, 2.0, true)); + add(new StateActionPair(new Observation(Nd4j.create(new double[] { -1.1, -1.2 }).reshape(1, 2)), 0, 1.0, false)); + add(new StateActionPair(new Observation(Nd4j.create(new double[] { -2.1, -2.2 }).reshape(1, 2)), 1, 2.0, true)); } }; - MockDQN dqnMock = new MockDQN(); // Act sut.computeGradients(dqnMock, experience); // Assert + ArgumentCaptor features = ArgumentCaptor.forClass(INDArray.class); + ArgumentCaptor targets = ArgumentCaptor.forClass(INDArray.class); + verify(dqnMock, times(1)).gradient(features.capture(), targets.capture()); + // input side -- should be a stack of observations - INDArray input = dqnMock.gradientParams.get(0).getLeft(); - assertEquals(-1.1, input.getDouble(0, 0), 0.00001); - assertEquals(-1.2, input.getDouble(0, 1), 0.00001); - assertEquals(-2.1, input.getDouble(1, 0), 0.00001); - assertEquals(-2.2, input.getDouble(1, 1), 0.00001); + INDArray featuresValues = features.getValue(); + assertEquals(-1.1, featuresValues.getDouble(0, 0), 0.00001); + assertEquals(-1.2, featuresValues.getDouble(0, 1), 0.00001); + assertEquals(-2.1, featuresValues.getDouble(1, 0), 0.00001); + assertEquals(-2.2, featuresValues.getDouble(1, 1), 0.00001); // target side - INDArray target = dqnMock.gradientParams.get(0).getRight(); - assertEquals(1.0 + gamma * 2.0, target.getDouble(0, 0), 0.00001); - assertEquals(1.2, target.getDouble(0, 1), 0.00001); - assertEquals(2.1, target.getDouble(1, 0), 0.00001); - assertEquals(2.0, target.getDouble(1, 1), 0.00001); + INDArray targetsValues = targets.getValue(); + assertEquals(1.0 + gamma * 2.0, targetsValues.getDouble(0, 0), 0.00001); + assertEquals(1.2, targetsValues.getDouble(0, 1), 0.00001); + assertEquals(2.1, targetsValues.getDouble(1, 0), 0.00001); + assertEquals(2.0, targetsValues.getDouble(1, 1), 0.00001); } } diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java index e19af338b..e1424c286 100644 --- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java +++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java @@ -18,6 +18,7 @@ package org.deeplearning4j.rl4j.learning.sync.qlearning.discrete; import org.deeplearning4j.gym.StepReply; +import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior; import org.deeplearning4j.rl4j.learning.IHistoryProcessor; import org.deeplearning4j.rl4j.learning.configuration.QLearningConfiguration; import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning; @@ -74,6 +75,9 @@ public class QLearningDiscreteTest { @Mock QLearningConfiguration mockQlearningConfiguration; + @Mock + ILearningBehavior learningBehavior; + // HWC int[] observationShape = new int[]{3, 10, 10}; int totalObservationSize = 1; @@ -92,18 +96,28 @@ public class QLearningDiscreteTest { } - private void mockTestContext(int maxSteps, int updateStart, int batchSize, double rewardFactor, int maxExperienceReplay) { + private void mockTestContext(int maxSteps, int updateStart, int batchSize, double rewardFactor, int maxExperienceReplay, ILearningBehavior learningBehavior) { when(mockQlearningConfiguration.getBatchSize()).thenReturn(batchSize); when(mockQlearningConfiguration.getRewardFactor()).thenReturn(rewardFactor); when(mockQlearningConfiguration.getExpRepMaxSize()).thenReturn(maxExperienceReplay); when(mockQlearningConfiguration.getSeed()).thenReturn(123L); - qLearningDiscrete = mock( - QLearningDiscrete.class, - Mockito.withSettings() - .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0) - .defaultAnswer(Mockito.CALLS_REAL_METHODS) - ); + if(learningBehavior != null) { + qLearningDiscrete = mock( + QLearningDiscrete.class, + Mockito.withSettings() + .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0, learningBehavior, Nd4j.getRandom()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + } + else { + qLearningDiscrete = mock( + QLearningDiscrete.class, + Mockito.withSettings() + .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + } } private void mockHistoryProcessor(int skipFrames) { @@ -136,7 +150,7 @@ public class QLearningDiscreteTest { public void when_singleTrainStep_expect_correctValues() { // Arrange - mockTestContext(100,0,2,1.0, 10); + mockTestContext(100,0,2,1.0, 10, null); // An example observation and 2 Q values output (2 actions) Observation observation = new Observation(Nd4j.zeros(observationShape)); @@ -162,7 +176,7 @@ public class QLearningDiscreteTest { @Test public void when_singleTrainStepSkippedFrames_expect_correctValues() { // Arrange - mockTestContext(100,0,2,1.0, 10); + mockTestContext(100,0,2,1.0, 10, learningBehavior); Observation skippedObservation = Observation.SkippedObservation; Observation nextObservation = new Observation(Nd4j.zeros(observationShape)); @@ -180,8 +194,8 @@ public class QLearningDiscreteTest { assertEquals(0, stepReply.getReward(), 1e-5); assertFalse(stepReply.isDone()); assertFalse(stepReply.getObservation().isSkipped()); - assertEquals(0, qLearningDiscrete.getExperienceHandler().getTrainingBatchSize()); + verify(learningBehavior, never()).handleNewExperience(any(Observation.class), any(Integer.class), any(Double.class), any(Boolean.class)); verify(mockDQN, never()).output(any(INDArray.class)); } From 2aed216c2a880174266ef52df9ff7edac5337886 Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 27 May 2020 21:15:03 +0300 Subject: [PATCH 15/21] Eliminated error with resize implementation. (#418) * Eliminated error with resize implementation. Signed-off-by: shugeo * Refactored resize caller implementation. Signed-off-by: shugeo * Refactored image.resize op helper. Signed-off-by: shugeo * Added dumb implementations for missed resize methods. Signed-off-by: shugeo * Added resize_images op. Refactored image_resize op. Signed-off-by: shugeo * Refactored matrix_band_part op and test. Signed-off-by: shugeo * Refactored resize_images op to comply with preserve_aspect_ratio flag properly. Signed-off-by: shugeo * Refactored resize_images and tests for resizeArea method. Signed-off-by: shugeo * Refactored resize methods and test. Signed-off-by: shugeo * Added new methods for TF2 resize op. Signed-off-by: shugeo * Portion of resize algorithms from TF2 Signed-off-by: shugeo * Added routine to process resize with given algorithm. Signed-off-by: shugeo * Added new image resize via scale and translate process helper. Signed-off-by: shugeo * Cpu implementation for V2 image resize operation helpers. Signed-off-by: shugeo * Added implementation for lancos5 algorithm of resize and test. Signed-off-by: shugeo * Added prints for span computing. Signed-off-by: shugeo * The first working implementation and tests for lancos5 resize. Signed-off-by: shugeo * Eliminated waste prints. Signed-off-by: shugeo * Refactored image_resize op and tests." Signed-off-by: shugeo * Lanczos3 resize implementation and tests. Signed-off-by: shugeo * Implemented bicubic resize algorithm and tests. Signed-off-by: shugeo * Added a couple of tests and cosmetic changes with image resize helper. Signed-off-by: shugeo * Added bilinear implementation for image resize. Signed-off-by: shugeo * Refactored bicubic algorithm and also implement area and neighbor algoritms for image resize on cpu arch. Signed-off-by: shugeo * Added a couple of tests for nearest neighbor and area resize. Signed-off-by: shugeo * Cosmetic changes for cpu implementation and added cuda implementation for resize methods. Signed-off-by: shugeo * Separated cuda implementation of v2 image resize. Signed-off-by: shugeo * Added kernels for span calculation and span gathering with new image resize cuda implementation. Signed-off-by: shugeo * Refactored cuda implementation of image resize kernels. Signed-off-by: shugeo * Finished the first working implementation of image resize op and tests. Signed-off-by: shugeo * Fixed resize_images and image_resize ops. Signed-off-by: shugeo * Refactored shape construction and output validation. Signed-off-by: shugeo * Fixed test to properly initalized with float. Signed-off-by: shugeo * Added 3D input opotunity for resize ops. Signed-off-by: shugeo * Fixed test for resize_images op. Signed-off-by: shugeo * Fixed test and call for resize_images op. Signed-off-by: shugeo * Refactored image_resize op output data type handling for nearest neighbors method and tests. Signed-off-by: shugeo * Fixed issue with wrong resize method. Signed-off-by: shugeo * Added checkup for wrong resize methods for resize ops. Signed-off-by: shugeo * Refactored resize methods and test. Signed-off-by: shugeo * Added output data type validation for given resize method. Signed-off-by: shugeo * - ResizeMethod rearranged in order to match C++ side - minor test fix Signed-off-by: raver119@gmail.com * Refactored resize_images op. Signed-off-by: shugeo Co-authored-by: raver119@gmail.com --- .../generic/images/image_resize.cpp | 58 +- .../generic/images/resize_images.cpp | 135 +++++ .../generic/linalg/matrix_band_part.cpp | 26 +- .../include/ops/declarable/headers/images.h | 155 +++++- .../ops/declarable/headers/parity_ops.h | 124 ----- .../declarable/helpers/cpu/image_resize.cpp | 399 +++++++++++++- .../declarable/helpers/cuda/image_resize.cu | 25 +- .../helpers/cuda/image_resize_v2.cu | 497 ++++++++++++++++++ .../ops/declarable/helpers/image_resize.h | 17 +- .../layers_tests/DeclarableOpsTests10.cpp | 88 ++++ .../layers_tests/DeclarableOpsTests11.cpp | 34 +- .../layers_tests/DeclarableOpsTests12.cpp | 325 ++++++++++++ .../org/nd4j/enums/ImageResizeMethod.java | 15 +- .../java/org/nd4j/nativeblas/Nd4jCuda.java | 14 +- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 266 ++-------- .../opvalidation/TransformOpValidation.java | 6 +- 16 files changed, 1753 insertions(+), 431 deletions(-) create mode 100644 libnd4j/include/ops/declarable/generic/images/resize_images.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu diff --git a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp index 3ceba93d8..4e680b337 100644 --- a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp +++ b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2019 Konduit K.K. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -32,57 +32,65 @@ namespace sd { auto size = INPUT_VARIABLE(1); auto output = OUTPUT_VARIABLE(0); + int width; int height; - bool preserveAspectRatio = false; // - default value bool antialias = false; - REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_bilinear: Resize params is a pair of values, not %lld.", size->lengthOf()); - width = size->e(0); - height = size->e(1); - if (block.getBArguments()->size()) { - preserveAspectRatio = B_ARG(0); - if (block.getBArguments()->size() > 1) - antialias = B_ARG(1); + REQUIRE_TRUE(size->lengthOf() == 2, 0, "image_resize: Resize params is a pair of values, not %lld.", size->lengthOf()); + width = size->e(1); + height = size->e(0); + if (block.numB() == 2) { + antialias = B_ARG(1); } auto method = helpers::ImageResizeMethods::kResizeBilinear; if (block.numI() == 1) { method = (helpers::ImageResizeMethods)INT_ARG(0); } + REQUIRE_TRUE(method == helpers::ImageResizeMethods::kResizeNearest || output->dataType() == DataType::FLOAT32, 0, "image_resize: Output data type should be FLOAT32 for this method %i", (int)method ); + REQUIRE_TRUE(method >= helpers::ImageResizeMethods::kResizeFirst && method <= helpers::ImageResizeMethods::kResizeLast, 0, "image_resize: Resize method should be between %i and %i, but %i was given.", (int)helpers::ImageResizeMethods::kResizeFirst, (int)helpers::ImageResizeMethods::kResizeLast, (int)method); + auto inRank = image->rankOf(); + REQUIRE_TRUE(inRank >=3 && inRank <=4, 0, "image_resize: Input rank should be 4 or 3, but %i given.", image->rankOf()); + auto source = inRank == 4?image->reshape(image->ordering(), {image->sizeAt(0), image->sizeAt(1), image->sizeAt(2), image->sizeAt(3)}):image->reshape(image->ordering(), {1, image->sizeAt(0), image->sizeAt(1), image->sizeAt(2)}); + auto target = inRank == 4?output->reshape(output->ordering(), {output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3)}, false) : output->reshape(output->ordering(), {1, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2)}, false); - return helpers::resizeFunctor(block.launchContext(), image, width, height, method, preserveAspectRatio, antialias, output); + return helpers::resizeFunctor(block.launchContext(), image, width, height, method, antialias, output); } DECLARE_SHAPE_FN(image_resize) { - auto shapeList = SHAPELIST(); auto in = inputShape->at(0); Nd4jLong* outputShape; + auto method = helpers::ImageResizeMethods::kResizeBilinear; + if (block.numI() == 1) { + method = (helpers::ImageResizeMethods)INT_ARG(0); + } int width; int height; + double ratio = shape::sizeAt(in, 1) / (0.0 + shape::sizeAt(in, 2)); auto newImageSize = INPUT_VARIABLE(1); REQUIRE_TRUE(newImageSize->lengthOf() == 2, 0, "resize_bilinear: Resize params is a pair of values, not %i.", newImageSize->lengthOf()); REQUIRE_TRUE(block.numI() <= 1, 0, "resize_bilinear: Resize params already given by the second param. Int params are expensive."); - width = newImageSize->e(0); - height = newImageSize->e(1); - - ALLOCATE(outputShape, block.getWorkspace(), shape::shapeInfoLength(4), Nd4jLong); - outputShape[0] = 4; - outputShape[1] = in[1]; - outputShape[2] = width; - outputShape[3] = height; - outputShape[4] = in[4]; - ShapeUtils::updateStridesAndType(outputShape, in, shape::order(in)); + width = newImageSize->e(1); + height = newImageSize->e(0); + if (block.numB() > 0) { + if (B_ARG(0)) { + width = math::nd4j_ceil(height / ratio); + } + } + auto dtype = DataType::FLOAT32; + if (method == helpers::ImageResizeMethods::kResizeNearest) + dtype = ArrayOptions::dataType(in); + auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector{in[1], height, width, in[4]}:std::vector{ height, width, in[4]}); - shapeList->push_back(CONSTANT(outputShape)); - return shapeList; + return SHAPELIST(shape); } DECLARE_TYPES(image_resize) { getOpDescriptor() - ->setAllowedInputTypes(0, {ALL_FLOATS}) + ->setAllowedInputTypes(0, {ALL_INTS, ALL_FLOATS}) ->setAllowedInputTypes(1, {ALL_INTS}) - ->setAllowedOutputTypes({ALL_FLOATS}); + ->setAllowedOutputTypes({ALL_FLOATS, ALL_INTS}); } } diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp new file mode 100644 index 000000000..c3f9ae8f1 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp @@ -0,0 +1,135 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// + +#include +#if NOT_EXCLUDED(OP_resize_images) + +#include +#include + +namespace sd { + namespace ops { + CUSTOM_OP_IMPL(resize_images, 1, 1, false, 0, 0) { + + auto image = INPUT_VARIABLE(0); + + auto output = OUTPUT_VARIABLE(0); + int width = output->sizeAt(2); + int height = output->sizeAt(1); + int method = helpers::ImageResizeMethods::kResizeBilinear; + if (block.width() > 1) { + auto size = INPUT_VARIABLE(1); + REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_images: Resize params is a pair of values, not %lld.", size->lengthOf()); +// width = size->e(1); +// height = size->e(0); + if (block.width() > 2) { + auto methodT = INPUT_VARIABLE(2); + + REQUIRE_TRUE(methodT->isZ() && methodT->isScalar(), 0, "resize_images: Method tensor should be integer scalar, but rank of %i tensor given.", methodT->rankOf()); + method = methodT->e(0); + } + else if (block.numI() == 1) { + method = I_ARG(0); + } + } + else { + REQUIRE_TRUE(block.numI() > 1 && block.numI() < 4, 0, "resize_images: Method and size should be given properly."); + if(block.numI() == 3) { // full stack of args +// height = I_ARG(0); +// width = I_ARG(1); + method = I_ARG(2); + } + else if (block.numI() == 2) { +// height = I_ARG(0); +// width = I_ARG(1); + } + } + bool preserveAspectRatio = false; // - default value + bool alignCorners = false; + if (block.numB()) { + alignCorners = B_ARG(0); + if (block.numB() > 1) + preserveAspectRatio = B_ARG(1); + } + REQUIRE_TRUE(method >= helpers::ImageResizeMethods::kResizeFirst && method <= helpers::ImageResizeMethods::kResizeOldLast, 0, "resize_images: Resize method should be between %i and %i, but %i was given.", (int)helpers::ImageResizeMethods::kResizeFirst, (int)helpers::ImageResizeMethods::kResizeOldLast, (int)method); + REQUIRE_TRUE(method == helpers::ImageResizeMethods::kResizeNearest || output->dataType() == DataType::FLOAT32, 0, "image_resize: Output data type should be FLOAT32 for this method %i", (int)method ); + + auto inRank = image->rankOf(); + REQUIRE_TRUE(inRank >=3 && inRank <=4, 0, "image_resize: Input rank should be 4 or 3, but %i given.", inRank); + + auto source = inRank == 4?image->reshape(image->ordering(), {image->sizeAt(0), image->sizeAt(1), image->sizeAt(2), image->sizeAt(3)}):image->reshape(image->ordering(), {1, image->sizeAt(0), image->sizeAt(1), image->sizeAt(2)}); + auto target = inRank == 4?output->reshape(output->ordering(), {output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3)}, false) : output->reshape(output->ordering(), {1, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2)}, false); + + return helpers::resizeImagesFunctor(block.launchContext(), &source, width, height, (helpers::ImageResizeMethods)method, alignCorners, &target); + } + + DECLARE_SHAPE_FN(resize_images) { + auto shapeList = SHAPELIST(); + auto in = inputShape->at(0); + + Nd4jLong* outputShape; + + int width; + int height; + if (block.width() > 1) { + auto size = INPUT_VARIABLE(1); + REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_images: Resize params is a pair of values, not %lld.", size->lengthOf()); + width = size->e(1); + height = size->e(0); + } + else { + REQUIRE_TRUE(block.numI() > 1 && block.numI() < 4, 0, "resize_images: Method and size should be given properly."); + if(block.numI() == 3) { // full stack of args + height = I_ARG(0); + width = I_ARG(1); + } + else if (block.numI() == 2) { + height = I_ARG(0); + width = I_ARG(1); + } + } + + double ratio = shape::sizeAt(in, 1) / (0.0 + shape::sizeAt(in, 2)); + if (block.numB() > 1) { + if (B_ARG(1)) { + width = math::nd4j_ceil(height / ratio); + } + } + + std::vector shape; + if (shape::rank(in) == 4) + shape = {in[1], height, width, in[4]}; + else if (shape::rank(in) == 3) + shape = {height, width, in[3]}; + + auto outShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, shape::order(in), shape); + return SHAPELIST(outShape); + } + DECLARE_TYPES(resize_images) { + getOpDescriptor() + ->setAllowedInputTypes(0, {ALL_FLOATS, ALL_INTS}) + ->setAllowedInputTypes(1, {ALL_INTS}) + ->setAllowedOutputTypes({DataType::FLOAT32}); + } + + } +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp index 08e059c37..51beff4c8 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp @@ -25,14 +25,27 @@ namespace sd { namespace ops { - CONFIGURABLE_OP_IMPL(matrix_band_part, 1, 1, true, 0, 2) { + CONFIGURABLE_OP_IMPL(matrix_band_part, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); - Nd4jLong minLower = INT_ARG(0); - Nd4jLong maxUpper = INT_ARG(1); + Nd4jLong minLower(0LL); + Nd4jLong maxUpper(0LL); + if (block.width() == 1) { + REQUIRE_TRUE(block.numI() == 2, 0, "matrix_band_part: min and max band numbers should be given before."); + minLower = INT_ARG(0); + maxUpper = INT_ARG(1); + } + else { + REQUIRE_TRUE(block.width() == 3, 0, "matrix_band_part: min and max band numbers should be given as scalars before."); + auto minLowerT = INPUT_VARIABLE(1); + auto maxUpperT = INPUT_VARIABLE(2); + REQUIRE_TRUE(minLowerT->isScalar() && maxUpperT->isScalar(), 0, "matrix_band_part: min and max should be scalars, but %i and %i ranks given", minLowerT->rankOf(), maxUpperT->rankOf()); + minLower = minLowerT->e(0); + maxUpper = maxUpperT->e(0); + } REQUIRE_TRUE(input->rankOf() >= 2, 0, "matrix_band_part: Input rank should be 2 or greater."); Nd4jLong N = input->sizeAt(-2); Nd4jLong M = input->sizeAt(-1); @@ -49,9 +62,10 @@ namespace sd { DECLARE_TYPES(matrix_band_part) { getOpDescriptor() - ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS}) - ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS}) - ->setSameMode(true); + ->setAllowedInputTypes(0, {ALL_INTS, ALL_FLOATS}) + ->setAllowedInputTypes(1, {ALL_INTS}) + ->setAllowedInputTypes(2, {ALL_INTS}) + ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS}); } } diff --git a/libnd4j/include/ops/declarable/headers/images.h b/libnd4j/include/ops/declarable/headers/images.h index 41974901a..aa2114540 100644 --- a/libnd4j/include/ops/declarable/headers/images.h +++ b/libnd4j/include/ops/declarable/headers/images.h @@ -85,6 +85,7 @@ namespace ops { */ #if NOT_EXCLUDED(OP_rgb_to_yuv) DECLARE_CONFIGURABLE_OP(yuv_to_rgb, 1, 1, true, 0, 0); +#endif /** * Rgb To Yiq @@ -108,8 +109,156 @@ namespace ops { DECLARE_CONFIGURABLE_OP(yiq_to_rgb, 1, 1, true, 0, 0); #endif -} -} +/** + * resize_images - resize image with given size and method + * there are 4 methods allowed: RESIZE_BILINEAR(0), RESIZE_NEIGHBOR(1), RESIZE_AREA(2) and RESIZE_BICUBIC(3) + * inputs: + * 0 - 4D tensor with shape {batch, height, width, channels} + * 1 - 1D integer tensor with {new_height, new_width} (optional) + * 2 - 0D integer tensor with method (0 to 3) (optional) + * + * int args: + * 0 - new_height + * 1 - new_width + * 2 - method + * + * bool args: + * 0 - align corners (default false) - optional + * 1 - preserve_aspect_ratio (default false) - optional + * + * CAUTION: one of methods can be used to give size and method - as tensors or as int args only + * + * output: + * 0 - 4D float32 tensor with shape {batch, new_height, new_width, channels} + * + */ +#if NOT_EXCLUDED(OP_resize_images) + DECLARE_CUSTOM_OP(resize_images, 1,1,false, 0, 0); +#endif -#endif + /** + * This op make bilinear or nearest neighbor interpolated resize for given tensor + * + * input array: + * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type + * 1 - 2D-Tensor with shape (num_boxes, 4) float type + * 2 - 1D-Tensor with shape (num_boxes) int type + * 3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type + * + * float arguments (optional) + * 0 - exprapolation_value (optional) default 0.f + * + * int arguments: (optional) + * 0 - mode (default 0 - bilinear interpolation) + * + * output array: + * the 4D-Tensor with resized to crop_size images given - float type + */ + #if NOT_EXCLUDED(OP_crop_and_resize) + DECLARE_CUSTOM_OP(crop_and_resize, 4, 1, false, -1, -1); + #endif + + /** + * This op make bilinear interpolated resize for given tensor + * + * input array: + * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) + * + * int arguments: (optional) + * 0 - new width + * 1 - new height + * + * output array: + * the 4D-Tensor with calculated backproped dots + * + * CAUTION: either size tensor or a pair of int params should be provided. + */ + + #if NOT_EXCLUDED(OP_resize_bilinear) + DECLARE_CUSTOM_OP(resize_bilinear, 1, 1, false, 0, -2); + #endif + + /** + * This op make nearest neighbor interpolated resize for given tensor + * + * input array: + * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) + * + * int arguments: (optional) + * 0 - new width + * 1 - new height + * + * output array: + * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) + * + * CAUTION: either size tensor or a pair of int params should be provided. + */ + + #if NOT_EXCLUDED(OP_resize_nearest_neighbor) + DECLARE_CUSTOM_OP(resize_nearest_neighbor, 1, 1, false, 0, -2); + #endif + + /** + * This op make bicubic interpolated resize for given tensor + * + * input array: + * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - 1D-Tensor with 2 values (newWidth, newHeight) + * + * output array: + * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) + * + */ + #if NOT_EXCLUDED(OP_resize_bicubic) + DECLARE_CUSTOM_OP(resize_bicubic, 1, 1, false, 0, -2); + #endif + + /** + * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor + * + * input array: + * 0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - size - 1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided). + * + * int args: - proveded only when size tensor is missing + * 0 - new height + * 1 - new width + * boolean args: + * 0 - align_corners - optional (default is false) + * + * output array: + * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) + * + */ + #if NOT_EXCLUDED(OP_resize_area) + DECLARE_CUSTOM_OP(resize_area, 1, 1, false, 0, -2); + #endif + + /** + * This op make interpolated resize for given tensor with given algorithm. + * Supported algorithms are bilinear, bicubic, nearest_neighbor, lanczos5, gaussian, area and mitchellcubic. + * + * input array: + * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - 1D-Tensor with 2 values (newWidth, newHeight) + * + * optional int args: + * 0 - algorithm - bilinear by default + * optional bool args: + * 0 - preserve_aspect_ratio - default False + * 1 - antialias - default False + * + * output array: + * the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels}) + * + */ + + #if NOT_EXCLUDED(OP_image_resize) + DECLARE_CUSTOM_OP(image_resize, 2, 1, false, 0, 0); + #endif + +} +} #endif diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index 74221133c..27c012214 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -1771,130 +1771,6 @@ namespace sd { DECLARE_CUSTOM_OP(reduce_logsumexp, 1, 1, false, 0, 0); #endif - /** - * This op make bilinear or nearest neighbor interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type - * 1 - 2D-Tensor with shape (num_boxes, 4) float type - * 2 - 1D-Tensor with shape (num_boxes) int type - * 3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type - * - * float arguments (optional) - * 0 - exprapolation_value (optional) default 0.f - * - * int arguments: (optional) - * 0 - mode (default 0 - bilinear interpolation) - * - * output array: - * the 4D-Tensor with resized to crop_size images given - float type - */ - #if NOT_EXCLUDED(OP_crop_and_resize) - DECLARE_CUSTOM_OP(crop_and_resize, 4, 1, false, -1, -1); - #endif - - /** - * This op make bilinear interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) - * - * int arguments: (optional) - * 0 - new width - * 1 - new height - * - * output array: - * the 4D-Tensor with calculated backproped dots - * - * CAUTION: either size tensor or a pair of int params should be provided. - */ - - #if NOT_EXCLUDED(OP_resize_bilinear) - DECLARE_CUSTOM_OP(resize_bilinear, 1, 1, false, 0, -2); - #endif - - /** - * This op make nearest neighbor interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) - * - * int arguments: (optional) - * 0 - new width - * 1 - new height - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - * CAUTION: either size tensor or a pair of int params should be provided. - */ - - #if NOT_EXCLUDED(OP_resize_nearest_neighbor) - DECLARE_CUSTOM_OP(resize_nearest_neighbor, 1, 1, false, 0, -2); - #endif - - /** - * This op make bicubic interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - */ - #if NOT_EXCLUDED(OP_resize_bicubic) - DECLARE_CUSTOM_OP(resize_bicubic, 1, 1, false, 0, -2); - #endif - - /** - * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor - * - * input array: - * 0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - size - 1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided). - * - * int args: - proveded only when size tensor is missing - * 0 - new height - * 1 - new width - * boolean args: - * 0 - align_corners - optional (default is false) - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - */ - #if NOT_EXCLUDED(OP_resize_area) - DECLARE_CUSTOM_OP(resize_area, 1, 1, false, 0, -2); - #endif - - /** - * This op make interpolated resize for given tensor with given algorithm. - * Supported algorithms are bilinear, bicubic, nearest_neighbor. - * Need to implement to full compatibility with TF: lanczos5, gaussian, area and mitchellcubic - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) - * - * optional int args: - * 0 - algorithm - bilinear by default - * optional bool args: - * 0 - preserve_aspect_ratio - default False - * 1 - antialias - default False - * - * output array: - * the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels}) - * - */ - - #if NOT_EXCLUDED(OP_image_resize) - DECLARE_CUSTOM_OP(image_resize, 2, 1, false, 0, 0); - #endif - /** * Copy a tensor setting everything outside a central band in each innermost matrix * diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp index 68b2130ac..7206b03e5 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp @@ -418,17 +418,17 @@ namespace helpers { // Allocate and initialize coefficients table using Bicubic // convolution algorithm. // https://en.wikipedia.org/wiki/Bicubic_interpolation - float* coeffs_table = new float[(kTableSize + 1) * 2]; + float* coeffsTable = new float[(kTableSize + 1) * 2]; auto func = PRAGMA_THREADS_FOR { for (auto i = start; i <= stop; ++i) { float x = i * 1.0 / kTableSize; - coeffs_table[i * 2] = ((a + 2) * x - (a + 3)) * x * x + 1; + coeffsTable[i * 2] = ((a + 2) * x - (a + 3)) * x * x + 1; x += 1.0; - coeffs_table[i * 2 + 1] = ((a * x - 5 * a) * x + 8 * a) * x - 4 * a; + coeffsTable[i * 2 + 1] = ((a * x - 5 * a) * x + 8 * a) * x - 4 * a; } }; samediff::Threads::parallel_for(func, 0, kTableSize); - return coeffs_table; + return coeffsTable; } const float* getCoeffsTable(const bool use_keys_cubic) { @@ -988,25 +988,392 @@ namespace helpers { return res; } - int resizeAreaFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, - bool const alignCorners, NDArray* output) { + int resizeAreaFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const alignCorners, NDArray* output) { BUILD_SINGLE_SELECTOR(image->dataType(), return resizeAreaFunctor_, (context, image, width, height, alignCorners, output), NUMERIC_TYPES); } + /** + * resize as TF v.2.x implemented (with preserve aspect ratio and antialias flags routines + * */ + // An interface for integrated scale functors. + struct IKernelFunc { + virtual float operator()(float x) const = 0; + virtual float radius() const = 0; + }; + + struct LanczosKernelFunc : public IKernelFunc { + // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc. + explicit LanczosKernelFunc(float const radius) : _radius(radius) {} + float operator()(float x) const { + float const kPI = 3.141592653589793f; + x = math::nd4j_abs(x); + if (x > _radius) return 0.f; + // Need to special case the limit case of sin(x) / x when x is zero. + if (x <= 1.e-3f) { + return 1.f; + } + return _radius * std::sin(kPI * x) * std::sin(kPI * x / _radius) / (kPI * kPI * x * x); + } + float radius() const { return _radius; } + const float _radius; + }; + + struct GaussianKernelFunc : public IKernelFunc { + static constexpr float kRadiusMultiplier = 3.0f; + // https://en.wikipedia.org/wiki/Gaussian_function + // We use sigma = 0.5, as suggested on p. 4 of Ken Turkowski's "Filters + // for Common Resampling Tasks" for kernels with a support of 3 pixels: + // www.realitypixels.com/turk/computergraphics/ResamplingFilters.pdf + // This implies a radius of 1.5, + explicit GaussianKernelFunc(float radius = 1.5f) + : _radius(radius), _sigma(radius / kRadiusMultiplier) {} + float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= _radius) return 0.0f; + return std::exp(-x * x / (2.0 * _sigma * _sigma)); + } + float radius() const { return _radius; } + const float _radius; + const float _sigma; // Gaussian standard deviation + }; + + struct BoxKernelFunc : public IKernelFunc { + float operator()(float x) const { + x = math::nd4j_abs(x); + return x < 0.5f ? 1.f : x == 0.5f ? 0.5f : 0.f; + } + float radius() const { return 1.f; } + }; + + struct TriangleKernelFunc : public IKernelFunc { + // https://en.wikipedia.org/wiki/Triangle_function + float operator()(float x) const { + x = math::nd4j_abs(x); + return x < 1.f ? 1.f - x : 0.f; + } + float radius() const { return 1.f; } + }; + + struct KeysCubicKernelFunc : public IKernelFunc { + // http://ieeexplore.ieee.org/document/1163711/ + // R. G. Keys. Cubic convolution interpolation for digital image + // processing. IEEE Transactions on Acoustics, Speech, and Signal + // Processing, 29(6):1153–1160, 1981. + float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= 2.0f) { + return 0.0f; + } else if (x >= 1.0f) { + return ((-0.5f * x + 2.5f) * x - 4.0f) * x + 2.0f; + } else { + return ((1.5f * x - 2.5f) * x) * x + 1.0f; + } + } + float radius() const { return 2.f; } + }; + + struct MitchellCubicKernelFunc : public IKernelFunc { + // https://doi.org/10.1145/378456.378514 + // D. P. Mitchell and A. N. Netravali. Reconstruction filters in computer + // graphics. Computer Graphics (Proceedings of ACM SIGGRAPH 1988), + // 22(4):221–228, 1988. + float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= 2.f) { + return 0.f; + } else if (x >= 1.f) { + return (((-7.f / 18.f) * x + 2.f) * x - 10.f / 3.f) * x + 16.f / 9.f; + } else { + return (((7.f / 6.f) * x - 2.f) * x) * x + 8.f / 9.f; + } + } + float radius() const { return 2.f; } + }; + + // A pre-computed span of pixels along a single dimension. + // The output pixel will be the weighted sum of pixels starting from start. + struct Spans { + // The maximum span size of any output pixel. + int _spanSize; + // int32 tensor with shape {outputSize}. + NDArray _starts; + + // float32 tensor of size {outputSize, spanSize}. + // The output pixel at x is computed as: + // dot_product(input[starts[x]:starts[x]+span_size], weights[x]). + NDArray _weights; + }; + + static int + computeSpans(IKernelFunc* kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans) { + // When sampling, we need the inverse scale and translation, to map from an + // output to an input pixel. + float const invScale = 1.f / scale; + float const invTranslate = -invScale * translate; + // When downsampling the kernel should be scaled since we want to low pass + // filter and interpolate, but when upsampling it should not be since we only + // want to interpolate. + float const kernelScale = antialias ? math::nd4j_max(invScale, 1.f) : 1.f; + spans._spanSize = math::nd4j_min(2 * static_cast(std::ceil(kernel->radius() * kernelScale)) + 1, static_cast(inSize)); + spans._starts = NDArrayFactory::create('c', {outSize}); + spans._weights = NDArrayFactory::create('c', {outSize, spans._spanSize}); + + auto startsVec = spans._starts.bufferAsT(); + auto weightsVector = spans._weights.bufferAsT(); + spans._weights.nullify(); + + const float invKernelScale = 1.f / kernelScale; + int maxSpanSize = 0; + std::vector tempWeights; + + // return value if within bounds or bounds otherwise + auto boundsAmp = [](Nd4jLong const low, Nd4jLong const high, Nd4jLong const value) { + if (high < value) return high; + if (value < low) return low; + return value; + }; + + for (auto x = 0LL; x < outSize; ++x) { + const float columnFloat = x + 0.5f; + const float sampleFloat = columnFloat * invScale + invTranslate; + + // Don't sample when the sampling location is outside the source image. + if (sampleFloat < 0 || sampleFloat > inSize) { + // Add an empty span. + startsVec[x] = 0; + continue; + } + Nd4jLong spanStart = math::nd4j_ceil(sampleFloat - kernel->radius() * kernelScale - 0.5f); + Nd4jLong spanEnd = math::nd4j_floor(sampleFloat + kernel->radius() * kernelScale - 0.5f); + spanStart = boundsAmp(0LL, inSize - 1, spanStart); + spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1; + int const spanSize = spanEnd - spanStart; + if (spanSize > spans._spanSize) { + return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize)); + } + float totalWeightSum = 0.f; + tempWeights.clear(); + for (int source = spanStart; source < spanEnd; ++source) { + float kernelPos = static_cast(source) + 0.5f - sampleFloat; + float weight = (*kernel)(kernelPos * invKernelScale); + totalWeightSum += weight; + tempWeights.push_back(weight); + } + maxSpanSize = std::max(maxSpanSize, spanSize); + if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min()) { // + auto totalWeightSumInverted = 1.0f / totalWeightSum; + auto outIndex = spans._spanSize * x; + for (auto weight : tempWeights) { + weightsVector[outIndex] = weight * totalWeightSumInverted; + ++outIndex; + } + } + startsVec[x] = spanStart; + } + return Status::OK(); + } + + template + static void gatherRows(int const spanSize, int const* starts, Z const* weights, X const* imagePtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, + Nd4jLong const outputWidth, Nd4jLong const channels, Z* outputPtr) { + auto inRowSize = inputWidth * channels; + auto outRowSize = outputWidth * channels; + + auto addScaledVector = [](const X* inVector, int vectorLen, Z weight, Z* outVector) { + Z* outVecEnd = outVector + vectorLen; + for (; outVector != outVecEnd; ++outVector, ++inVector) { + *outVector += weight * static_cast(*inVector); + } + }; + + for (int y = 0; y < outputHeight; ++y) { + Z* outRowData = outputPtr + outRowSize * y; + memset(outRowData, '\0', outRowSize * sizeof(Z));// std::fill(outRowData, outRowData + outRowSize, 0.f); + int inRow = starts[y]; + auto inRowData = imagePtr + inRowSize * inRow; + auto weightsStart = weights + y * spanSize; + auto realSpanSize = math::nd4j_min(starts[y] + spanSize, static_cast(inputHeight)) - starts[y]; + auto weightsEnd = weightsStart + realSpanSize; + for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) { + addScaledVector(inRowData, inRowSize, *weightPtr, outRowData); + inRowData += inRowSize; + } + } + } + + template + static void gatherColumns(int const spanSize, int const* starts, Z const* weights, Z const* imagesPtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, Nd4jLong const outputWidth, Nd4jLong channels, Z* outputPtr) { + auto inRowSize = inputWidth * channels; + auto outRowSize = outputWidth * channels; + + for (auto y = 0LL; y < outputHeight; ++y) { + auto inputRowStart = imagesPtr + inRowSize * y; + auto outPixels = outputPtr + outRowSize * y; + for (auto x = 0LL; x < outputWidth; ++x, outPixels += channels) { + auto inPixels = inputRowStart + starts[x] * channels; + auto weightsStart = weights + x * spanSize; + auto realSpanSize = math::nd4j_min(starts[x] + spanSize, static_cast(inputWidth)) - starts[x]; + auto weightsEnd = weightsStart + realSpanSize; + for (int c = 0; c < channels; ++c) { + outPixels[c] = 0.0f; + } + for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) { + Z w = *weightPtr; + for (int c = 0; c < channels; ++c) { + outPixels[c] += w * static_cast(inPixels[c]); + } + inPixels += channels; + } + } + } + } + + template + static void gatherSpans(int const rowSpanSize, NDArray const& rowStarts, NDArray const& rowWeights, int const colSpanSize, NDArray const& columnStarts, NDArray const& columnWeights, NDArray const* images, NDArray& intermediate, NDArray* output) { + auto batchSize = images->sizeAt(0); + auto inputHeight = images->sizeAt(1); + auto inputWidth = images->sizeAt(2); + auto channels = images->sizeAt(3); + + auto outputHeight = output->sizeAt(1); + auto outputWidth = output->sizeAt(2); + + auto inputPixPerBatch = inputWidth * inputHeight * channels; + auto intermediatePixPerBatch = inputWidth * outputHeight * channels; + auto outputPixPerBatch = outputWidth * outputHeight * channels; + Z* intermediatePtr = intermediate.bufferAsT(); + + const X* imagePtr = images->bufferAsT(); + Z* outPtr = output->bufferAsT(); + for (int b = 0; b < batchSize; ++b, imagePtr += inputPixPerBatch, + intermediatePtr += intermediatePixPerBatch, + outPtr += outputPixPerBatch) { + gatherRows(rowSpanSize, rowStarts.bufferAsT(), rowWeights.bufferAsT(), + imagePtr, inputHeight, inputWidth, outputHeight, + inputWidth, channels, intermediatePtr); + gatherColumns(colSpanSize, columnStarts.bufferAsT(), columnWeights.bufferAsT(), + intermediatePtr, outputHeight, inputWidth, outputHeight, outputWidth, channels, outPtr); + } + } + + template + static int resizeKernel(IKernelFunc* transformationKernel, NDArray const* input, Nd4jLong outWidth, Nd4jLong outHeight, bool antialias, NDArray* output) { + Nd4jLong const batchSize = input->sizeAt(0); + Nd4jLong const inputHeight = input->sizeAt(1); + Nd4jLong const inputWidth = input->sizeAt(2); + Nd4jLong const channels = input->sizeAt(3); + + Z rowScale = Z(outHeight) / Z(inputHeight); + Z columnScale = Z(outWidth) / Z(inputWidth); + + // Return if the output is empty. + if (output->lengthOf() == 0) return Status::OK(); + + Spans colSpans; + + auto res = computeSpans(transformationKernel, outWidth, inputWidth, columnScale, 0.f, antialias, colSpans); + if (res != Status::OK()) return res; + Spans rowSpans; + res = computeSpans(transformationKernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + NDArray intermediate = NDArrayFactory::create('c', {batchSize, outHeight, inputWidth, channels}); + + //const functor::Spans& const_row_spans = row_spans; + //typename TTypes::ConstTensor row_starts( + //const_row_spans.starts.tensor()); + auto& rowStarts = rowSpans._starts; // shape {outWidth} + auto& rowWeights = rowSpans._weights; // shape {outWidth, numSpans} + auto& columnStarts = colSpans._starts; // shape {outHeights} + auto& columnWeights = colSpans._weights; // shape {outHeights, numSpans} + + gatherSpans(rowSpans._spanSize, rowStarts, rowWeights, colSpans._spanSize, columnStarts, columnWeights, input, intermediate, output); + return res; + } + + static int resizeBilinear(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + auto kernel = std::unique_ptr(new TriangleKernelFunc()); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, + (kernel.get(), image, (Nd4jLong) width, (Nd4jLong) height, antialias, output), + NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeBilinear: Unknown error occured."); + } + + static int resizeBicubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + if (antialias) { + auto kernel = std::unique_ptr(new KeysCubicKernelFunc()); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, + (kernel.get(), image, (Nd4jLong) width, (Nd4jLong) height, antialias, output), + NUMERIC_TYPES, FLOAT_TYPES_1); + } + else { + return resizeBicubicFunctorA(context, image, width, height, false, true, output); + } + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeBicubic: Unknown error occured."); + } + + static int resizeNeighbor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + return resizeNeighborFunctor(context, image, width, height, false, true, output); + } + + static int resizeArea(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + return resizeAreaFunctor(context, image, width, height, false, output); + } + + static int resizeLanczos3(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + auto kernel = std::unique_ptr(new LanczosKernelFunc(3.f)); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos3: Unknown error occured."); + } + + static int resizeLanczos5(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + auto kernel = std::unique_ptr(new LanczosKernelFunc(5.f)); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos5: Unknown error occured."); + } + + static int resizeGaussian(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + auto kernel = std::unique_ptr(new GaussianKernelFunc()); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeGaussian: Unknown error occured."); + } + + static int resizeMitchellcubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + auto kernel = std::unique_ptr(new MitchellCubicKernelFunc()); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeMitchelcubic: Unknown error occured."); + } + +// ------------------------------------------------------------------------------------------------------------------ // + int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, + ImageResizeMethods method, bool alignCorners, NDArray* output) { + switch (method) { + case kResizeBilinear: + return resizeBilinearFunctor(context, image, width, height, alignCorners, false, output); + case kResizeNearest: + return resizeNeighborFunctor(context, image, width, height, alignCorners, false, output); + case kResizeBicubic: + return resizeBicubicFunctor(context, image, width, height, alignCorners, false, output); + case kResizeArea: + return resizeAreaFunctor(context, image, width, height, alignCorners, output); + } + nd4j_printf("helper::resizeImagesFunctor: Wrong resize method %i\n", (int)method); + return Status::CODE(ND4J_STATUS_BAD_INPUT, "helper::resizeImagesFunctor: Wrong resize method"); + } // ------------------------------------------------------------------------------------------------------------------ // int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, - ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output) { + ImageResizeMethods method, bool antialias, NDArray* output) { switch (method) { - case kResizeBilinear: return resizeBilinearFunctor(context, image, width, height, false, false, output); break; - case kResizeNearest: return resizeNeighborFunctor(context, image, width, height, false, false, output); break; - case kResizeBicubic: return resizeBicubicFunctor(context, image, width, height, preserveAspectRatio, antialias, output); break; - case kResizeArea: return resizeAreaFunctor(context, image, width, height, preserveAspectRatio, output); - case kResizeLanczos5: - case kResizeGaussian: - case kResizeMitchelcubic: - throw std::runtime_error("helper::resizeFunctor: Non implemented yet."); + case kResizeBilinear: return resizeBilinear(context, image, width, height, antialias, output); + case kResizeNearest: return resizeNeighbor(context, image, width, height, antialias, output); + case kResizeBicubic: return resizeBicubic(context, image, width, height, antialias, output); + case kResizeArea: return resizeArea(context, image, width, height, antialias, output); + case kResizeLanczos3: return resizeLanczos3(context, image, width, height, antialias, output); + case kResizeLanczos5: return resizeLanczos5(context, image, width, height, antialias, output); + case kResizeGaussian: return resizeGaussian(context, image, width, height, antialias, output); + case kResizeMitchellcubic: return resizeMitchellcubic(context, image, width, height, antialias, output); } - return ND4J_STATUS_OK; + nd4j_printf("helper::resizeFunctor: Wrong resize method %i\n", (int)method); + return Status::CODE(ND4J_STATUS_BAD_INPUT, "helper::resizeFunctor: Wrong resize method"); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu index 180c8ad0e..3365d5d62 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu @@ -35,6 +35,7 @@ limitations under the License. #include #include +#include namespace sd { namespace ops { @@ -1203,20 +1204,22 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template int resizeBicubicFunctorA_, (sd::LaunchContext * context, NDArray const* image, int width, int height, bool const alignCorners, bool const halfPixelCenters, NDArray* output), NUMERIC_TYPES); -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int width, int height, - ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output) { + +// ------------------------------------------------------------------------------------------------------------------ // + int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, + ImageResizeMethods method, bool alignCorners, NDArray* output) { switch (method) { - case kResizeBilinear: return resizeBilinearFunctor(context, image, width, height, false, false, output); break; - case kResizeNearest: return resizeNeighborFunctor(context, image, width, height, false, false, output); break; - case kResizeBicubic: return resizeBicubicFunctor(context, image, width, height, preserveAspectRatio, antialias, output); break; - case kResizeLanczos5: - case kResizeGaussian: + case kResizeBilinear: + return resizeBilinearFunctor(context, image, width, height, alignCorners, false, output); + case kResizeNearest: + return resizeNeighborFunctor(context, image, width, height, alignCorners, false, output); + case kResizeBicubic: + return resizeBicubicFunctor(context, image, width, height, alignCorners, false, output); case kResizeArea: - case kResizeMitchelcubic: - throw std::runtime_error("helper::resizeFunctor: Non implemented yet."); + return resizeAreaFunctor(context, image, width, height, alignCorners, output); + default: + throw std::runtime_error("helper::resizeImagesFunctor: Wrong resize method."); } - return ND4J_STATUS_OK; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu new file mode 100644 index 000000000..b727822c9 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu @@ -0,0 +1,497 @@ +#include +#include +#include +#include + +namespace sd { + namespace ops { + namespace helpers { +// -------------------------------------------------------------------------------------------------------------- // +// resize v2 implementation // +// -------------------------------------------------------------------------------------------------------------- // +// A functional interface for a scale kernels. +//struct IKernelFunc { +// _CUDA_HD virtual float operator()(float x) const = 0; +// _CUDA_HD virtual float radius() const = 0; +// _CUDA_HD virtual size_t size() const = 0; +//}; + +struct LanczosKernelFunc /*: public IKernelFunc*/ { + // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc. + explicit LanczosKernelFunc(float const radius) : _radius(radius) {} + _CUDA_HD float operator()(float x) const { + float const kPI = 3.141592653589793f; + x = math::nd4j_abs(x); + if (x > _radius) return 0.f; + // Need to special case the limit case of sin(x) / x when x is zero. + if (x <= 1.e-3f) { + return 1.f; + } + return _radius * std::sin(kPI * x) * std::sin(kPI * x / _radius) / (kPI * kPI * x * x); + } + _CUDA_HD float radius() const { return _radius; } + const float _radius; +}; + +struct GaussianKernelFunc /*: public IKernelFunc*/ { + static constexpr float kRadiusMultiplier = 3.0f; + // https://en.wikipedia.org/wiki/Gaussian_function + // We use sigma = 0.5, as suggested on p. 4 of Ken Turkowski's "Filters + // for Common Resampling Tasks" for kernels with a support of 3 pixels: + // www.realitypixels.com/turk/computergraphics/ResamplingFilters.pdf + // This implies a radius of 1.5, + explicit GaussianKernelFunc(float radius = 1.5f) + : _radius(radius), _sigma(radius / kRadiusMultiplier) {} + _CUDA_HD float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= _radius) return 0.0f; + return std::exp(-x * x / (2.0 * _sigma * _sigma)); + } + _CUDA_HD float radius() const { return _radius; } + const float _radius; + const float _sigma; // Gaussian standard deviation +}; + +struct BoxKernelFunc /*: public IKernelFunc*/ { + _CUDA_HD float operator()(float x) const { + x = math::nd4j_abs(x); + return x < 0.5f ? 1.f : x == 0.5f ? 0.5f : 0.f; + } + _CUDA_HD float radius() const { return 1.f; } + _CUDA_HD size_t size() const { return sizeof(BoxKernelFunc); } +}; + +struct TriangleKernelFunc /*: public IKernelFunc*/ { + // https://en.wikipedia.org/wiki/Triangle_function + _CUDA_HD float operator()(float x) const { + x = math::nd4j_abs(x); + return x < 1.f ? 1.f - x : 0.f; + } + _CUDA_HD float radius() const { return 1.f; } +}; + +struct KeysCubicKernelFunc /*: public IKernelFunc*/ { + // http://ieeexplore.ieee.org/document/1163711/ + // R. G. Keys. Cubic convolution interpolation for digital image + // processing. IEEE Transactions on Acoustics, Speech, and Signal + // Processing, 29(6):1153–1160, 1981. + _CUDA_HD float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= 2.0f) { + return 0.0f; + } else if (x >= 1.0f) { + return ((-0.5f * x + 2.5f) * x - 4.0f) * x + 2.0f; + } else { + return ((1.5f * x - 2.5f) * x) * x + 1.0f; + } + } + _CUDA_HD float radius() const { return 2.f; } +}; + +struct MitchellCubicKernelFunc/* : public IKernelFunc*/ { + // https://doi.org/10.1145/378456.378514 + // D. P. Mitchell and A. N. Netravali. Reconstruction filters in computer + // graphics. Computer Graphics (Proceedings of ACM SIGGRAPH 1988), + // 22(4):221–228, 1988. + _CUDA_HD float operator()(float x) const { + x = math::nd4j_abs(x); + if (x >= 2.f) { + return 0.f; + } else if (x >= 1.f) { + return (((-7.f / 18.f) * x + 2.f) * x - 10.f / 3.f) * x + 16.f / 9.f; + } else { + return (((7.f / 6.f) * x - 2.f) * x) * x + 8.f / 9.f; + } + } + _CUDA_HD float radius() const { return 2.f; } +}; + +// A pre-computed span of pixels along a single dimension. +// The output pixel will be the weighted sum of pixels starting from start. +struct Spans { + // The maximum span size of any output pixel. + int _spanSize; + // int32 tensor with shape {outputSize}. + NDArray _starts; + + // float32 tensor of size {outputSize, spanSize}. + // The output pixel at x is computed as: + // dot_product(input[starts[x]:starts[x]+span_size], weights[x]). + NDArray _weights; +}; + +static inline _CUDA_HD Nd4jLong boundsAmp(Nd4jLong const low, Nd4jLong const high, Nd4jLong const value) { + if (high < value) return high; + if (value < low) return low; + return value; +} + +template +static __global__ void computeSpansKernel(TKernelFunc* kernel, int* startsVec, float* weightsVector, Nd4jLong outSize, Nd4jLong inSize, float kernelScale, int spanSize, float const invScale, float const invTranslate, float invKernelScale, float* tempWeightsBuf) { + + // return value if within bounds or bounds otherwise + auto tid = threadIdx.x + blockIdx.x * blockDim.x; + auto step = blockDim.x * gridDim.x; + __shared__ int maxSpanSize; + + if (threadIdx.x == 0 && blockIdx.x == 0) { + maxSpanSize = 0; + } + __syncthreads(); + + for (auto x = tid; x < outSize; x += step) { + const float columnFloat = x + 0.5f; + const float sampleFloat = columnFloat * invScale + invTranslate; + + // Don't sample when the sampling location is outside the source image. + if (sampleFloat < 0 || sampleFloat > inSize) { + // Add an empty span. + startsVec[x] = 0; + continue; + } + Nd4jLong spanStart = math::nd4j_ceil(sampleFloat - kernel->radius() * kernelScale - 0.5f); + Nd4jLong spanEnd = math::nd4j_floor(sampleFloat + kernel->radius() * kernelScale - 0.5f); + spanStart = boundsAmp(0LL, inSize - 1, spanStart); + spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1; + int const spanSize = spanEnd - spanStart; + if (spanSize > spanSize) { + return ; //throw "Exception"; ////return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize)); + } + float totalWeightSum = 0.f; + auto tempWeights = &tempWeightsBuf[x]; + auto actualWeights = 0; + for (int source = spanStart; source < spanEnd; ++source) { + float kernelPos = static_cast(source) + 0.5f - sampleFloat; + float weight = (*kernel)(kernelPos * invKernelScale); + totalWeightSum += weight; + tempWeights[actualWeights++] = weight; + } + maxSpanSize = math::nd4j_max(maxSpanSize, spanSize); + if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min()) { // + auto totalWeightSumInverted = 1.0f / totalWeightSum; + auto outIndex = spanSize * x; + for (auto weightIndex = 0; weightIndex < actualWeights; ++weightIndex) { + weightsVector[outIndex] = tempWeights[weightIndex] * totalWeightSumInverted; + ++outIndex; + } + } + startsVec[x] = spanStart; + } + +} + +template +static int computeSpans(LaunchContext* context, TKernelFunc& kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans) { + // When sampling, we need the inverse scale and translation, to map from an + // output to an input pixel. + float const invScale = 1.f / scale; + float const invTranslate = -invScale * translate; + // When downsampling the kernel should be scaled since we want to low pass + // filter and interpolate, but when upsampling it should not be since we only + // want to interpolate. + float const kernelScale = antialias ? math::nd4j_max(invScale, 1.f) : 1.f; + spans._spanSize = math::nd4j_min(2 * static_cast(std::ceil(kernel.radius() * kernelScale)) + 1, static_cast(inSize)); + spans._starts = NDArrayFactory::create('c', {outSize}); spans._starts.syncToHost(); + spans._weights = NDArrayFactory::create('c', {outSize, spans._spanSize}); spans._weights.syncToHost(); + + auto startsVec = reinterpret_cast(spans._starts.buffer()); + auto weightsVector = reinterpret_cast(spans._weights.buffer()); + spans._weights.nullify(); + + const float invKernelScale = 1.f / kernelScale; +// NDArray tempWeights = NDArrayFactory::create('c', {outSize, spans._spanSize}); +// auto tempWeightsBuf = reinterpret_cast(tempWeights.specialBuffer()); +// PointersManager mg(context, "ops::helpers::computeSpans"); +// auto specialKernel = reinterpret_cast(mg.replicatePointer(&kernel, sizeof(TKernelFunc))); + auto stream = context->getCudaStream(); + //computeSpansKernel<<<1, 1, 128, *stream>>>(specialKernel, startsVec, weightsVector, outSize, inSize, kernelScale, spans._spanSize, invScale, invTranslate, invKernelScale, tempWeightsBuf); + auto maxSpanSize = 0; + std::vector tempWeights; + for (auto x = 0; x < outSize; x ++) { + const float columnFloat = x + 0.5f; + const float sampleFloat = columnFloat * invScale + invTranslate; + + // Don't sample when the sampling location is outside the source image. + if (sampleFloat < 0 || sampleFloat > inSize) { + // Add an empty span. + startsVec[x] = 0; + continue; + } + Nd4jLong spanStart = math::nd4j_ceil(sampleFloat - kernel.radius() * kernelScale - 0.5f); + Nd4jLong spanEnd = math::nd4j_floor(sampleFloat + kernel.radius() * kernelScale - 0.5f); + spanStart = boundsAmp(0LL, inSize - 1, spanStart); + spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1; + int const spanSize = spanEnd - spanStart; + if (spanSize > spans._spanSize) { + return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize)); + } + float totalWeightSum = 0.f; + tempWeights.clear(); + + for (int source = spanStart; source < spanEnd; ++source) { + float kernelPos = static_cast(source) + 0.5f - sampleFloat; + float weight = kernel(kernelPos * invKernelScale); + totalWeightSum += weight; + tempWeights.push_back(weight); + } + maxSpanSize = math::nd4j_max(maxSpanSize, spanSize); + if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min()) { // + auto totalWeightSumInverted = 1.0f / totalWeightSum; + auto outIndex = spans._spanSize * x; + for (auto weightIndex = 0; weightIndex < tempWeights.size(); ++weightIndex) { + weightsVector[outIndex++] = tempWeights[weightIndex] * totalWeightSumInverted; +// ++outIndex; + } + } + startsVec[x] = spanStart; + } + spans._starts.tickWriteHost(); spans._weights.tickWriteHost(); + spans._starts.syncToDevice(); + spans._weights.syncToDevice(); +// cudaStreamSynchronize(*stream); + return Status::OK(); +} + +//template int computeSpans(LaunchContext* context, TriangleKernelFunc& kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans); + + +template +static __device__ void gatherRows(int const spanSize, int const* starts, Z const* weights, X const* imagePtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, + Nd4jLong const outputWidth, Nd4jLong const channels, Z* outputPtr) { + auto inRowSize = inputWidth * channels; + auto outRowSize = outputWidth * channels; + + auto addScaledVector = [](const X* inVector, int vectorLen, Z weight, Z* outVector) { + Z* outVecEnd = outVector + vectorLen; + for (; outVector != outVecEnd; ++outVector, ++inVector) { + *outVector += weight * static_cast(*inVector); + } + }; + + for (int y = 0; y < outputHeight; ++y) { + Z* outRowData = outputPtr + outRowSize * y; + memset(outRowData, '\0', outRowSize * sizeof(Z));// std::fill(outRowData, outRowData + outRowSize, 0.f); + int inRow = starts[y]; + auto inRowData = imagePtr + inRowSize * inRow; + auto weightsStart = weights + y * spanSize; + auto realSpanSize = math::nd4j_min(starts[y] + spanSize, static_cast(inputHeight)) - starts[y]; + auto weightsEnd = weightsStart + realSpanSize; + for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) { + addScaledVector(inRowData, inRowSize, *weightPtr, outRowData); + inRowData += inRowSize; + } + } +} + +template +static __device__ void gatherColumns(int const spanSize, int const* starts, Z const* weights, Z const* imagesPtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, Nd4jLong const outputWidth, Nd4jLong channels, Z* outputPtr) { + auto inRowSize = inputWidth * channels; + auto outRowSize = outputWidth * channels; + + for (auto y = 0LL; y < outputHeight; ++y) { + auto inputRowStart = imagesPtr + inRowSize * y; + auto outPixels = outputPtr + outRowSize * y; + for (auto x = 0LL; x < outputWidth; ++x, outPixels += channels) { + auto inPixels = inputRowStart + starts[x] * channels; + auto weightsStart = weights + x * spanSize; + auto realSpanSize = math::nd4j_min(starts[x] + spanSize, static_cast(inputWidth)) - starts[x]; + auto weightsEnd = weightsStart + realSpanSize; + for (int c = 0; c < channels; ++c) { + outPixels[c] = 0.0f; + } + for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) { + Z w = *weightPtr; + for (int c = 0; c < channels; ++c) { + outPixels[c] += w * static_cast(inPixels[c]); + } + inPixels += channels; + } + } + } +} + +template +static __global__ void batchedGatherSpan(Nd4jLong batchSize, Nd4jLong inputWidth, Nd4jLong inputHeight, Nd4jLong outputWidth, Nd4jLong outputHeight, Nd4jLong channels, int rowSpanSize, int const* rowStartsBuf, Z const* rowWeightBuf, int columnSpanSize, int const* columnStartsBuf, Z const* columnWeightBuf, X const* pImages, Z* pIntermediate, Z* pOutput, + Nd4jLong inputPixPerBatch, Nd4jLong intermediatePixPerBatch, Nd4jLong outputPixPerBatch) { + + auto tid = threadIdx.x + blockIdx.x * blockDim.x; + auto step = blockDim.x * gridDim.x; + + for (int b = tid; b < batchSize; b += step) { + auto imagePtr = pImages + b * inputPixPerBatch; + auto intermediatePtr = pIntermediate + b * intermediatePixPerBatch; + auto outputPtr = pOutput + b * outputPixPerBatch; + gatherRows(rowSpanSize, rowStartsBuf, rowWeightBuf, + imagePtr, inputHeight, inputWidth, outputHeight, + inputWidth, channels, intermediatePtr); + gatherColumns(columnSpanSize, columnStartsBuf, columnWeightBuf, + intermediatePtr, outputHeight, inputWidth, outputHeight, outputWidth, channels, outputPtr); + } +} + +template +static void gatherSpans(LaunchContext* context, int const rowSpanSize, NDArray const& rowStarts, NDArray const& rowWeights, int const colSpanSize, NDArray const& columnStarts, NDArray const& columnWeights, NDArray const* images, NDArray& intermediate, NDArray* output) { + auto batchSize = images->sizeAt(0); + auto inputHeight = images->sizeAt(1); + auto inputWidth = images->sizeAt(2); + auto channels = images->sizeAt(3); + + auto outputHeight = output->sizeAt(1); + auto outputWidth = output->sizeAt(2); + + auto inputPixPerBatch = inputWidth * inputHeight * channels; + auto intermediatePixPerBatch = inputWidth * outputHeight * channels; + auto outputPixPerBatch = outputWidth * outputHeight * channels; + auto intermediatePtr = reinterpret_cast(intermediate.specialBuffer()); + + auto imagePtr = reinterpret_cast(images->specialBuffer()); + auto outputPtr = reinterpret_cast(output->specialBuffer()); + auto stream = context->getCudaStream(); + auto rowStartsBuf = reinterpret_cast(rowStarts.specialBuffer()); + auto rowWeightBuf = reinterpret_cast(rowWeights.specialBuffer()); + auto columnStartsBuf = reinterpret_cast(columnStarts.specialBuffer()); + auto columnWeightBuf = reinterpret_cast(columnWeights.specialBuffer()); + batchedGatherSpan<<<128, 128, 256, *stream>>>(batchSize, inputWidth, inputHeight, outputWidth, outputHeight, channels, rowSpanSize, rowStartsBuf, rowWeightBuf, colSpanSize, columnStartsBuf, columnWeightBuf, imagePtr, intermediatePtr, outputPtr, inputPixPerBatch, intermediatePixPerBatch, outputPixPerBatch); +} + +template +static int resizeKernel(LaunchContext* context, ImageResizeMethods method, NDArray const* input, Nd4jLong outWidth, Nd4jLong outHeight, bool antialias, NDArray* output) { + Nd4jLong const batchSize = input->sizeAt(0); + Nd4jLong const inputHeight = input->sizeAt(1); + Nd4jLong const inputWidth = input->sizeAt(2); + Nd4jLong const channels = input->sizeAt(3); + NDArray::prepareSpecialUse({output}, {input}); + Z rowScale = Z(outHeight) / Z(inputHeight); + Z columnScale = Z(outWidth) / Z(inputWidth); + + // Return if the output is empty. + if (output->lengthOf() == 0) return Status::OK(); + + Spans colSpans; + Spans rowSpans; + auto res = Status::OK(); + switch(method) { + case kResizeBilinear: { + TriangleKernelFunc kernel; + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + } + break; + case kResizeBicubic: { + KeysCubicKernelFunc kernel; + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + } break; + case kResizeLanczos3:{ + LanczosKernelFunc kernel(3.f); + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + } break; + + case kResizeLanczos5: { + LanczosKernelFunc kernel(5.f); + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + } break; + case kResizeGaussian: { + GaussianKernelFunc kernel; + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + } break; + case kResizeMitchellcubic:{ + MitchellCubicKernelFunc kernel; + res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias, + colSpans); + if (res != Status::OK()) return res; + res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans); + + } break; + }; + + NDArray intermediate = NDArrayFactory::create('c', {batchSize, outHeight, inputWidth, channels}); + + //const functor::Spans& const_row_spans = row_spans; + //typename TTypes::ConstTensor row_starts( + //const_row_spans.starts.tensor()); + auto& rowStarts = rowSpans._starts; // shape {outWidth} + auto& rowWeights = rowSpans._weights; // shape {outWidth, numSpans} + auto& columnStarts = colSpans._starts; // shape {outHeights} + auto& columnWeights = colSpans._weights; // shape {outHeights, numSpans} + + gatherSpans(context, rowSpans._spanSize, rowStarts, rowWeights, colSpans._spanSize, columnStarts, columnWeights, input, intermediate, output); + + NDArray::registerSpecialUse({output}, {input}); + return res; +} + + +static int resizeTriangle(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { +// std::unique_ptr kernel(new TriangleKernelFunc); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeBilinear, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeTriangle: This resize method is avaliable in future versions"); +} + +static int resizeLanczos3(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { +// std::unique_ptr kernel(new LanczosKernelFunc(3.f)); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeLanczos3, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos3: This resize method is avaliable in future versions"); +} + +static int resizeLanczos5(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { +// std::unique_ptr kernel(new LanczosKernelFunc(5.f)); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeLanczos5, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos5: This resize method is avaliable in future versions"); +} + +static int resizeGaussian(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeGaussian, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeGaussian: This resize method is avaliable in future versions"); +} +static int resizeMitchellcubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeMitchellcubic, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeMitchelcubic: This resize method is avaliable in future versions"); +} +static int resizeKeycubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) { + if (!antialias) + return resizeBicubicFunctorA(context, image, width, height, false, true, output); + BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeBicubic, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1); + return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeKeycubic: This resize method is avaliable in future versions"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int width, int height, + ImageResizeMethods method, bool antialias, NDArray* output) { + switch (method) { + case kResizeBilinear: return resizeTriangle(context, image, width, height, antialias, output); + case kResizeNearest: return resizeNeighborFunctor(context, image, width, height, false, true, output); + case kResizeBicubic: return resizeKeycubic(context, image, width, height, antialias, output); + case kResizeLanczos3: return resizeLanczos3(context, image, width, height, antialias, output); + case kResizeLanczos5: return resizeLanczos5(context, image, width, height, antialias, output); + case kResizeGaussian: return resizeGaussian(context, image, width, height, antialias, output); + case kResizeArea: return resizeAreaFunctor(context, image, width, height, false, output); + case kResizeMitchellcubic: return resizeMitchellcubic(context, image, width, height, antialias, output); + default: + nd4j_printf("helper::resizeFunctor: Wrong resize method %i\n", (int)method); + throw std::runtime_error("helper::resizeFunctor: Wrong resize method."); + } + return ND4J_STATUS_OK; +} + + + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/image_resize.h b/libnd4j/include/ops/declarable/helpers/image_resize.h index c11e94ed4..bd9e10b58 100644 --- a/libnd4j/include/ops/declarable/helpers/image_resize.h +++ b/libnd4j/include/ops/declarable/helpers/image_resize.h @@ -28,13 +28,17 @@ namespace ops { namespace helpers { enum ImageResizeMethods { - kResizeBilinear = 1, - kResizeBicubic, + kResizeBilinear = 0, // as java require kResizeNearest, + kResizeBicubic, + kResizeArea, kResizeGaussian, + kResizeLanczos3, kResizeLanczos5, - kResizeMitchelcubic, - kResizeArea + kResizeMitchellcubic, + kResizeFirst = kResizeBilinear, + kResizeLast = kResizeMitchellcubic, + kResizeOldLast = kResizeArea }; int resizeBilinearFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, @@ -49,7 +53,10 @@ namespace helpers { bool const alignCorners, NDArray* output); int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, - ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output); + ImageResizeMethods method, bool antialias, NDArray* output); + + int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, + ImageResizeMethods method, bool alignCorners, NDArray* output); } } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 6d89bd182..2ffc2c22d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -396,6 +396,29 @@ TEST_F(DeclarableOpsTests10, TestMarixBandPart_Test_1) { ASSERT_TRUE(exp.equalsTo(results.at(0))); } +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, TestMarixBandPart_Test_2) { + + auto x = NDArrayFactory::create('c', {2, 3, 3}); + auto minD = NDArrayFactory::create(1); + auto maxD = NDArrayFactory::create(1); + auto exp = NDArrayFactory::create('c', {2, 3, 3}); + x.linspace(1); + exp.linspace(1); + exp.p(0, 0, 2, 0.); + exp.p(1, 0, 2, 0.); + exp.p(0, 2, 0, 0.); + exp.p(1, 2, 0, 0.); + + sd::ops::matrix_band_part op; + auto results = op.evaluate({&x, &minD, &maxD}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + //results.at(0)->printIndexedBuffer("MBP Test1"); + //exp.printIndexedBuffer("MBP Expec"); + ASSERT_TRUE(exp.equalsTo(results.at(0))); +} + ////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests10, atan2_test1) { @@ -1528,6 +1551,71 @@ TEST_F(DeclarableOpsTests10, ImageResizeBilinear_Test01) { } +TEST_F(DeclarableOpsTests10, ResizeImages_Test1) { + + NDArray input = NDArrayFactory::create('c', {2, 4, 5, 3}); + input.linspace(1.); + + auto expected = NDArrayFactory::create('c', {2, 7, 9, 3}, { + 1.f, 2.f, 3.f, 2.6666667f, 3.6666667f, 4.666667f, 4.3333335f, 5.3333335f, 6.3333335f, 6.f, + 7.f, 8.f, 7.666667f, 8.666667f, 9.666667f, 9.333334f, 10.333334f, 11.333334f, 11.f, 12.f, + 13.f, 12.666667f, 13.666667f, 14.666667f, 13.f, 14.f, 15.f, 9.571429f, 10.571429f, 11.571429f, + 11.238095f, 12.238095f, 13.238095f, 12.904762f, 13.904762f, 14.904762f, 14.571429f, 15.571429f, 16.57143f, + 16.238096f, 17.238096f, 18.238096f, 17.904762f, 18.904762f, 19.904762f, 19.57143f, 20.57143f, 21.57143f, + 21.238096f, 22.238096f, 23.238096f, 21.57143f, 22.57143f, 23.57143f, 18.142859f, 19.142859f, 20.142859f, + 19.809525f, 20.809525f, 21.809525f, 21.476192f, 22.476192f, 23.476192f, 23.142859f, 24.142859f, 25.142859f, + 24.809526f, 25.809526f, 26.809526f, 26.476192f, 27.476192f, 28.476192f, 28.142859f, 29.142859f, 30.142859f, + 29.809526f, 30.809526f, 31.809526f, 30.142859f, 31.142859f, 32.142857f, 26.714287f, 27.714287f, 28.714287f, + 28.380955f, 29.380955f, 30.380955f, 30.04762f, 31.04762f, 32.047623f, 31.714287f, 32.714287f, 33.714287f, + 33.380955f, 34.380955f, 35.380955f, 35.047623f, 36.047623f, 37.047623f, 36.714287f, 37.714287f, 38.714287f, + 38.380955f, 39.380955f, 40.380955f, 38.714287f, 39.714287f, 40.714287f, 35.285717f, 36.285717f, 37.285717f, + 36.952385f, 37.952385f, 38.952385f, 38.61905f, 39.61905f, 40.61905f, 40.285717f, 41.285717f, 42.285717f, + 41.952385f, 42.952385f, 43.952385f, 43.61905f, 44.61905f, 45.61905f, 45.285717f, 46.285717f, 47.285717f, + 46.952385f, 47.952385f, 48.952385f, 47.285717f, 48.285717f, 49.285717f, 43.857143f, 44.857143f, 45.857143f, + 45.52381f, 46.52381f, 47.52381f, 47.190475f, 48.190475f, 49.190475f, 48.857143f, 49.857143f, 50.857143f, + 50.52381f, 51.52381f, 52.52381f, 52.190475f, 53.190475f, 54.190475f, 53.857143f, 54.857143f, 55.857143f, + 55.52381f, 56.52381f, 57.52381f, 55.857143f, 56.857143f, 57.857143f, 46.f, 47.f, 48.f, + 47.666668f, 48.666668f, 49.666668f, 49.333332f, 50.333332f, 51.333332f, 51.f, 52.f, 53.f, + 52.666668f, 53.666668f, 54.666668f, 54.333332f, 55.333332f, 56.333332f, 56.f, 57.f, 58.f, + 57.666668f, 58.666668f, 59.666668f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f, + 62.666668f, 63.666668f, 64.666664f, 64.333336f, 65.333336f, 66.333336f, 66.f, 67.f, 68.f, + 67.666664f, 68.666664f, 69.666664f, 69.333336f, 70.333336f, 71.333336f, 71.f, 72.f, 73.f, + 72.666664f, 73.666664f, 74.666664f, 73.f, 74.f, 75.f, 69.57143f, 70.57143f, 71.57143f, + 71.2381f, 72.2381f, 73.23809f, 72.90476f, 73.90476f, 74.90476f, 74.57143f, 75.57143f, 76.57143f, + 76.23809f, 77.23809f, 78.23809f, 77.90476f, 78.90476f, 79.90476f, 79.57143f, 80.57143f, 81.57143f, + 81.23809f, 82.23809f, 83.23809f, 81.57143f, 82.57143f, 83.57143f, 78.14286f, 79.14286f, 80.14286f, + 79.809525f, 80.809525f, 81.809525f, 81.4762f, 82.4762f, 83.4762f, 83.14286f, 84.14286f, 85.14286f, + 84.809525f, 85.809525f, 86.809525f, 86.4762f, 87.4762f, 88.4762f, 88.14286f, 89.14286f, 90.14286f, + 89.809525f, 90.809525f, 91.809525f, 90.14286f, 91.14286f, 92.14286f, 86.71429f, 87.71429f, 88.71429f, + 88.38095f, 89.38095f, 90.38095f, 90.04762f, 91.04762f, 92.04762f, 91.71429f, 92.71429f, 93.71429f, + 93.38095f, 94.38095f, 95.38095f, 95.04762f, 96.04762f, 97.04762f, 96.71429f, 97.71429f, 98.71429f, + 98.38095f, 99.38095f, 100.38095f, 98.71429f, 99.71429f, 100.71429f, 95.28571f, 96.28571f, 97.28571f, + 96.95238f, 97.95238f, 98.95238f, 98.61905f, 99.61905f, 100.61905f, 100.28571f, 101.28571f, 102.28571f, + 101.95238f, 102.95238f, 103.95238f, 103.61905f, 104.61905f, 105.61905f, 105.28571f, 106.28571f, 107.28571f, + 106.95238f, 107.95238f, 108.95238f, 107.28571f, 108.28571f, 109.28571f, 103.85715f, 104.85715f, 105.85715f, + 105.5238f, 106.5238f, 107.5238f,107.190475f,108.190475f,109.190475f, 108.85715f, 109.85715f, 110.85715f, + 110.5238f, 111.5238f, 112.5238f,112.190475f,113.190475f,114.190475f, 113.85715f, 114.85715f, 115.85715f, + 115.5238f, 116.5238f, 117.5238f, 115.85715f, 116.85715f, 117.85715f, 106.f, 107.f, 108.f, + 107.666664f,108.666664f,109.666664f,109.333336f,110.333336f,111.333336f, 111.f, 112.f, 113.f, + 112.666664f,113.666664f,114.666664f,114.333336f,115.333336f,116.333336f, 116.f, 117.f, 118.f, + 117.666664f,118.666664f,119.666664f, 118.f, 119.f, 120.f + }); + + auto size = NDArrayFactory::create({7, 11}); + sd::ops::resize_images op; + auto results = op.evaluate({&input, &size}, {}, {0}, {false, true}); // resize with bilinear method + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + NDArray *result = results.at(0); + +// result->printBuffer("Resized to 7x9"); +// expected.printBuffer("Expect for 7x9"); +// result.printShapeInfo("Output shape"); +// expected.printShapeInfo("Expect shape"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} TEST_F(DeclarableOpsTests10, ImageResizeBilinear_Test02) { NDArray input = NDArrayFactory::create('c', {2, 5,5,3}, { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index 23c40ebae..97dcf7574 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -25,6 +25,7 @@ #include #include #include +#include using namespace sd; @@ -1346,6 +1347,34 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test8) { ASSERT_TRUE(expected.equalsTo(result)); } +TEST_F(DeclarableOpsTests11, ResizeImages_Test8) { + + NDArray input = NDArrayFactory::create('c', {1, 3, 3, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9 + }); + + NDArray expected = NDArrayFactory::create('c', {1, 6, 6, 1}, { +// 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, 4.f, 4.f, 5.f, 5.f, +// 6.f, 6.f, 7.f, 7.f, 8.f, 8.f, 9.f, 9.f, 7.f, 7.f, 8.f, 8.f, 9.f, 9.f + 1.f , 1.f , 1.5f, 2.f , 2.f, 3.f, 1.f , 1.f , 1.5f, 2.f , 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, 4.f , 4.f , 4.5f , 5.f, 5.f, 6.f , + 4.f, 4.f, 4.5f , 5.f, 5.f, 6.f, 7.f , 7.f , 7.5f , 8.f , 8.f , 9.f + }); + //input.linspace(1); +// auto size = NDArrayFactory::create({6, 6}); + sd::ops::resize_images op; + auto results = op.evaluate({&input}, {}, {6, 8, ops::helpers::kResizeArea}, {true, true}); // resize_area to 6x8 with align corners and preserve aspect ratio of input image + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + NDArray* result = results.at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, ImageResizeArea_Test9) { @@ -1354,7 +1383,10 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test9) { }); NDArray expected = NDArrayFactory::create('c', {1, 10, 10, 4}, { - 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333336f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999998f, 9.999997f, 10.999997f, 11.999997f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 15.666671f, 16.666672f, 17.666672f, 18.666672f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 18.333344f, 19.333344f, 20.333345f, 21.333344f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000002f, 22.000000f, 23.000002f, 24.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 15.666661f, 16.666662f, 17.666660f, 18.666660f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 18.333334f, 19.333332f, 20.333334f, 21.333332f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999989f, 21.999989f, 22.999987f, 23.999987f + 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, + 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, + 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, + 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333336f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999998f, 9.999997f, 10.999997f, 11.999997f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 15.666671f, 16.666672f, 17.666672f, 18.666672f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 18.333344f, 19.333344f, 20.333345f, 21.333344f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000002f, 22.000000f, 23.000002f, 24.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 15.666661f, 16.666662f, 17.666660f, 18.666660f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 18.333334f, 19.333332f, 20.333334f, 21.333332f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999989f, 21.999989f, 22.999987f, 23.999987f }); //input.linspace(1); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp index 2bca43ae9..66762f79d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp @@ -27,6 +27,7 @@ #include #include #include +#include using namespace sd; @@ -2821,6 +2822,330 @@ TEST_F(DeclarableOpsTests12, QR_Test_2) { } +TEST_F(DeclarableOpsTests12, ImageResize_Test1) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.628328f, 0.97913796f, 1.8058043f, 2.563919f, 2.844548f, + 3.6026628f, 4.4293294f, 4.7801394f, 2.9474494f, 3.2982588f, + 4.1249247f, 4.8830395f, 5.1636696f, 5.9217834f, 6.7484493f, + 7.09926f, 8.165832f, 8.516642f, 9.3433075f, 10.101422f, + 10.382052f, 11.140167f, 11.966835f, 12.317646f, 10.924093f, + 11.274903f, 12.10157f, 12.859686f, 13.140315f, 13.898429f, + 14.725095f, 15.075906f, 13.682358f, 14.033167f, 14.859833f, + 15.617949f, 15.898578f, 16.656693f, 17.48336f, 17.834171f, + 18.900742f, 19.251549f, 20.078213f, 20.83633f, 21.11696f, + 21.875074f, 22.701742f, 23.052553f, 21.219858f, 21.57067f, + 22.397337f, 23.155449f, 23.436079f, 24.194195f, 25.020863f, + 25.371672f + }); + + sd::ops::image_resize op; + // resize with lancos5 without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos5}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Lancos5 Resized to 7x8"); +// expected.printBuffer("Lancos5 Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test2) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.628328f, 0.97913796f, 1.8058043f, 2.563919f, 2.844548f, + 3.6026628f, 4.4293294f, 4.7801394f, 2.9474494f, 3.2982588f, + 4.1249247f, 4.8830395f, 5.1636696f, 5.9217834f, 6.7484493f, + 7.09926f, 8.165832f, 8.516642f, 9.3433075f, 10.101422f, + 10.382052f, 11.140167f, 11.966835f, 12.317646f, 10.924093f, + 11.274903f, 12.10157f, 12.859686f, 13.140315f, 13.898429f, + 14.725095f, 15.075906f, 13.682358f, 14.033167f, 14.859833f, + 15.617949f, 15.898578f, 16.656693f, 17.48336f, 17.834171f, + 18.900742f, 19.251549f, 20.078213f, 20.83633f, 21.11696f, + 21.875074f, 22.701742f, 23.052553f, 21.219858f, 21.57067f, + 22.397337f, 23.155449f, 23.436079f, 24.194195f, 25.020863f, + 25.371672f + }); + + sd::ops::image_resize op; + // resize with lanczos5 without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos5}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result.printBuffer("Lanczos5 Resized to 8x7"); +// expected.printBuffer("Lanczos5 Expect for 8x7"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test3) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.6537938f, 1.0309073f, 1.8018917f, 2.4606667f, 2.9888396f, 3.6476145f, 4.418599f, + 4.7957115f, 3.1913466f, 3.5684595f, 4.3394437f, 4.998219f, 5.526393f, 6.185168f, + 6.956152f, 7.3332644f, 7.626866f, 8.00398f, 8.774965f, 9.433739f, 9.961912f, + 10.620688f, 11.391673f, 11.7687845f, 10.929041f, 11.306154f, 12.077138f, 12.735914f, + 13.264087f, 13.922862f, 14.693848f, 15.07096f, 14.231217f, 14.60833f, 15.379314f, + 16.038086f, 16.56626f, 17.225037f, 17.996023f, 18.373135f, 18.666735f, 19.043848f, + 19.814833f, 20.473606f, 21.00178f, 21.660557f, 22.431541f, 22.808653f, 21.204287f, + 21.581398f, 22.352386f, 23.01116f, 23.539333f, 24.19811f, 24.969095f, 25.346205f + }); + + sd::ops::image_resize op; + // resize with lanczos3 without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos3}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result.printBuffer("Lanczos3 Resized to 8x7"); +// expected.printBuffer("Lanczos3 Expect for 8x7"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test4) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 1.4150869f, 1.7928237f, 2.4084527f, 3.0680697f, 3.6419308f, 4.301548f, 4.9171767f, + 5.294914f, 4.012885f, 4.390622f, 5.0062513f, 5.6658688f, 6.23973f, 6.899347f, + 7.514975f, 7.8927126f, 7.358912f, 7.736648f, 8.352278f, 9.011895f, 9.585756f, + 10.245375f, 10.861001f, 11.238739f, 11.060086f, 11.437822f, 12.0534525f, 12.713069f, + 13.28693f, 13.946548f, 14.562176f, 14.939912f, 14.761261f, 15.138998f, 15.754629f, + 16.414246f, 16.988108f, 17.647724f, 18.263351f, 18.641088f, 18.107288f, 18.485023f, + 19.100655f, 19.760273f, 20.334133f, 20.993752f, 21.609377f, 21.987114f, 20.705086f, + 21.082823f, 21.698452f, 22.35807f, 22.93193f, 23.591549f, 24.207174f, 24.584913f + }); + + sd::ops::image_resize op; + // resize with gaussian without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeGaussian}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result.printBuffer("Lanczos3 Resized to 8x7"); +// expected.printBuffer("Lanczos3 Expect for 8x7"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test5) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.6372399f, 1.0536414f, 1.7716959f, 2.3966959f, 3.0216959f, 3.6466963f, 4.3647504f, 4.781152f, + 3.3926036f, 3.8090053f, 4.5270596f, 5.1520596f, 5.7770596f, 6.4020596f, 7.1201134f, 7.5365143f, + 7.358708f, 7.7751093f, 8.493164f, 9.118163f, 9.743165f, 10.368165f, 11.086218f, 11.502619f, + 10.928043f, 11.344445f, 12.0625f, 12.6875f, 13.3125f, 13.9375f, 14.655554f, 15.071955f, + 14.49738f, 14.913782f, 15.631836f, 16.256836f, 16.881836f, 17.506836f, 18.22489f, 18.64129f, + 18.463486f, 18.879889f, 19.597942f, 20.222942f, 20.847942f, 21.472942f, 22.190996f, 22.607397f, + 21.218851f, 21.635252f, 22.353308f, 22.978308f, 23.603308f, 24.228308f, 24.946362f, 25.362762f + }); + + sd::ops::image_resize op; + // resize with bicubic without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBicubic}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Bicubic Resized to 7x8"); +// expected.printBuffer("Bicubic Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test6) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.63678247f, 1.0531839f, 1.7712381f, 2.396238f, 3.021238f , 3.646238f, 4.364292f, 4.780694f, + 3.3934183f, 3.8098197f, 4.5278745f, 5.1528745f, 5.7778745f, 6.402874f, 7.1209283f, 7.5373297f, + 7.3566165f, 7.7730184f, 8.491073f, 9.116073f, 9.741073f, 10.366074f , 11.084127f , 11.500528f, + 10.928043f, 11.344445f, 12.0625f , 12.6875f , 13.3125f , 13.9375f , 14.655554f, 15.071955f , 14.499474f , 14.915876f , 15.633932f, 16.25893f, 16.883932f, 17.508932f, 18.226984f , 18.643385f, + 18.46267f, 18.87907f, 19.597128f, 20.222126f , 20.847128f, 21.472126f, 22.190182f , 22.606583f , 21.219305f, 21.635706f , + 22.353762f, 22.978762f , 23.603762f , 24.228764f, 24.946815f , 25.363216f + }); + + sd::ops::image_resize op; + // resize with bicubic with antialising and without aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBicubic}, {false, true}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Bicubic Resized to 7x8"); +// expected.printBuffer("Bicubic Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test7) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 0.98593485f, 1.3872082f, 2.0625007f, 2.6875007f, 3.3125012f, 3.937501f, 4.612794f, 5.014066f, + 3.6096964f, 4.01097f, 4.6862626f, 5.311262f, 5.936263f, 6.561262f, 7.2365556f, 7.637828f, + 7.4145045f, 7.8157787f, 8.491071f, 9.116072f, 9.741073f, 10.366072f, 11.041365f, 11.4426365f, + 10.985933f, 11.387209f, 12.062499f, 12.687501f, 13.312502f, 13.9375f, 14.612794f, 15.014066f, + 14.557361f, 14.958637f, 15.633926f, 16.25893f, 16.88393f, 17.508926f, 18.18422f, 18.585491f, + 18.36217f, 18.763443f, 19.438736f, 20.063736f, 20.688738f, 21.313736f, 21.98903f, 22.3903f, + 20.985931f, 21.387209f, 22.0625f, 22.6875f, 23.3125f, 23.937498f, 24.612793f, 25.014061f + }); + + sd::ops::image_resize op; + // resize with Mitchell cubic with antialising and without aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeMitchellcubic}, {false, true}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Mitchell cubic Resized to 7x8"); +// expected.printBuffer("Mitchell cubic Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test8) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 1.f , 1.4375f , 2.0625f , 2.6875f , 3.3125f , 3.9375f , 4.5625f , 5.f , + 3.8571427f, 4.2946424f, 4.9196424f, 5.5446424f, 6.1696424f, 6.7946424f, 7.4196424f, 7.8571424f, + 7.4285717f, 7.8660717f, 8.491072f , 9.116072f , 9.741072f , 10.366072f , 10.991072f , 11.428572f , + 11.f , 11.4375f , 12.0625f , 12.6875f , 13.3125f , 13.9375f , 14.5625f , 15.f , + 14.571429f , 15.008929f, 15.633929f, 16.25893f , 16.88393f , 17.50893f , 18.13393f , 18.57143f , + 18.142857f , 18.580357f, 19.205357f, 19.830357f , 20.455357f , 21.080357f , 21.705357f , 22.142857f , + 21.f , 21.4375f , 22.0625f , 22.6875f , 23.3125f , 23.9375f , 24.5625f , 25.f + }); + + sd::ops::image_resize op; + // resize with bilinear without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBilinear}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Bilinear Resized to 7x8"); +// expected.printBuffer("Bilinear Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test9) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 1.f , 1.4f , 2.f , 2.8f , 3.2f , 4.f , 4.6f , 5.f , + 4.f , 4.4f , 5.f , 5.8f , 6.2f , 7.f , 7.6f , 8.f , + 6.999998f, 7.399998f, 7.999998f, 8.799997f, 9.199997f, 9.999997f, 10.599997f, 10.999996f, + 11.f, 11.399999f, 12.f, 12.799999f, 13.199999f, 13.999998f, 14.599998f, 14.999999f, + 15.f, 15.4f, 16.f, 16.8f, 17.2f, 18.f, 18.6f, 19.f, 17.999989f, + 18.399990f, 18.999989f, 19.799988f, 20.199987f, 20.999989f, 21.599989f, 21.999989f, 21.f, + 21.4f, 22.f, 22.8f, 23.2f, 24.f, 24.6f, 25.f + }); + + sd::ops::image_resize op; + // resize with area without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeArea}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Area Resized to 7x8"); +// expected.printBuffer("Area Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test10) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9, 10, 10, 6, + 6, 7, 8, 8, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 16, + 17, 18, 18, 19, 20, 20, 16, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, + 23, 23, 24, 25, 25 + }); + + sd::ops::image_resize op; + // resize with nearest neigbors without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeNearest}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Nearest neighbor Resized to 7x8"); +// expected.printBuffer("Nearest neighbor Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + +TEST_F(DeclarableOpsTests12, ImageResize_Test11) { + + NDArray input = NDArrayFactory::create('c', {1, 5, 5, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + }); + auto size = NDArrayFactory::create({7, 8}); + NDArray expected = NDArrayFactory::create('c', {1, 7, 8, 1}, { + 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9, 10, 10, 6, + 6, 7, 8, 8, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 16, + 17, 18, 18, 19, 20, 20, 16, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, + 23, 23, 24, 25, 25 + }); + + sd::ops::image_resize op; + // resize with nearest neigbors without antialising and aspect ratio preserving + auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeNearest}, {false, false}); + + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + + auto result = results[0];///.at(0); +// result->printBuffer("Nearest neighbor Resized to 7x8"); +// expected.printBuffer("Nearest neighbor Expect for 7x8"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); +} + //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests12, TriangularSolve_Test_1) { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java index 42043dad7..951e87fdc 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java @@ -27,17 +27,12 @@ package org.nd4j.enums; * ResizeArea: Anti-aliased resampling with area interpolation. 'antialias' has no effect when used with area interpolation; it always anti-aliases. * ResizeMitchelcubic: Mitchell-Netravali Cubic non-interpolating filter. For synthetic images (especially those lacking proper prefiltering), less ringing than Keys cubic kernel but less sharp. */ public enum ImageResizeMethod { - ResizeBilinear, - - ResizeBicubic, - + ResizeBilinear, // as java require ResizeNearest, - + ResizeBicubic, + ResizeArea, ResizeGaussian, - + ResizeLanczos3, ResizeLanczos5, - - ResizeMitchelcubic, - - ResizeArea + ResizeMitchellcubic; } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 1307ab0ae..59496d780 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -4417,7 +4417,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * fill target matrix with given value in one or two directions from main diagonal: - * - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both) + * - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both) * - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both) * direction - in what direction to fill matrix. There are 3 possible directions: * 'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected @@ -4830,9 +4830,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); //////////////////////////////////////////////////////////////////////// - +//////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////// - // #ifndef __JAVACPP_HACK__ // #endif @@ -7349,9 +7351,9 @@ public static final int PREALLOC_SIZE = 33554432; * Returns the element wise stride for this information * buffer */ - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] shapeInfo); /** diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index b4ef3cb05..b9e4adb5a 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -4421,7 +4421,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * fill target matrix with given value in one or two directions from main diagonal: - * - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both) + * - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both) * - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both) * direction - in what direction to fill matrix. There are 3 possible directions: * 'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected @@ -4834,9 +4834,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); //////////////////////////////////////////////////////////////////////// - +//////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////// - // #ifndef __JAVACPP_HACK__ // #endif @@ -7353,9 +7355,9 @@ public static final int PREALLOC_SIZE = 33554432; * Returns the element wise stride for this information * buffer */ - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] shapeInfo); /** @@ -21173,214 +21175,6 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); } // #endif - /** - * This op make bilinear or nearest neighbor interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type - * 1 - 2D-Tensor with shape (num_boxes, 4) float type - * 2 - 1D-Tensor with shape (num_boxes) int type - * 3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type - * - * float arguments (optional) - * 0 - exprapolation_value (optional) default 0.f - * - * int arguments: (optional) - * 0 - mode (default 0 - bilinear interpolation) - * - * output array: - * the 4D-Tensor with resized to crop_size images given - float type - */ -// #if NOT_EXCLUDED(OP_crop_and_resize) - @Namespace("sd::ops") public static class crop_and_resize extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public crop_and_resize(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public crop_and_resize(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public crop_and_resize position(long position) { - return (crop_and_resize)super.position(position); - } - - public crop_and_resize() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - - /** - * This op make bilinear interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) - * - * int arguments: (optional) - * 0 - new width - * 1 - new height - * - * output array: - * the 4D-Tensor with calculated backproped dots - * - * CAUTION: either size tensor or a pair of int params should be provided. - */ - -// #if NOT_EXCLUDED(OP_resize_bilinear) - @Namespace("sd::ops") public static class resize_bilinear extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public resize_bilinear(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public resize_bilinear(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public resize_bilinear position(long position) { - return (resize_bilinear)super.position(position); - } - - public resize_bilinear() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - - /** - * This op make nearest neighbor interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) - * - * int arguments: (optional) - * 0 - new width - * 1 - new height - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - * CAUTION: either size tensor or a pair of int params should be provided. - */ - -// #if NOT_EXCLUDED(OP_resize_nearest_neighbor) - @Namespace("sd::ops") public static class resize_nearest_neighbor extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public resize_nearest_neighbor(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public resize_nearest_neighbor(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public resize_nearest_neighbor position(long position) { - return (resize_nearest_neighbor)super.position(position); - } - - public resize_nearest_neighbor() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - - /** - * This op make bicubic interpolated resize for given tensor - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - */ -// #if NOT_EXCLUDED(OP_resize_bicubic) - @Namespace("sd::ops") public static class resize_bicubic extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public resize_bicubic(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public resize_bicubic(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public resize_bicubic position(long position) { - return (resize_bicubic)super.position(position); - } - - public resize_bicubic() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - - /** - * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor - * - * input array: - * 0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - size - 1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided). - * - * int args: - proveded only when size tensor is missing - * 0 - new height - * 1 - new width - * boolean args: - * 0 - align_corners - optional (default is false) - * - * output array: - * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) - * - */ -// #if NOT_EXCLUDED(OP_resize_area) - @Namespace("sd::ops") public static class resize_area extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public resize_area(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public resize_area(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public resize_area position(long position) { - return (resize_area)super.position(position); - } - - public resize_area() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - - /** - * This op make interpolated resize for given tensor with given algorithm. - * Supported algorithms are bilinear, bicubic, nearest_neighbor. - * Need to implement to full compatibility with TF: lanczos5, gaussian, area and mitchellcubic - * - * input array: - * 0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) - * 1 - 1D-Tensor with 2 values (newWidth, newHeight) - * - * optional int args: - * 0 - algorithm - bilinear by default - * optional bool args: - * 0 - preserve_aspect_ratio - default False - * 1 - antialias - default False - * - * output array: - * the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels}) - * - */ - -// #if NOT_EXCLUDED(OP_image_resize) - @Namespace("sd::ops") public static class image_resize extends DeclarableCustomOp { - static { Loader.load(); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public image_resize(Pointer p) { super(p); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public image_resize(long size) { super((Pointer)null); allocateArray(size); } - private native void allocateArray(long size); - @Override public image_resize position(long position) { - return (image_resize)super.position(position); - } - - public image_resize() { super((Pointer)null); allocate(); } - private native void allocate(); - public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } -// #endif - /** * Copy a tensor setting everything outside a central band in each innermost matrix * @@ -22783,7 +22577,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #define LIBND4J_HEADERS_BLAS_H // #include - + /** * This op is general matmum implementation. Depending on inputs dimensionality output result might be different. * matrix x matrix = BLAS gemm @@ -22904,11 +22698,11 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * alpha: vector of T * beta: vector of T * ...: A, B matrices sequentially. i.e: AAAAABBBBB - * + * * Integer arguments: * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments * batchCount - number of operations in this batch - * + * * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch. */ // #if NOT_EXCLUDED(OP_batched_gemm) @@ -22931,22 +22725,22 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); /** * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array: - * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) + * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) * * Input array: * x[..., Rows, Cols], the necessary condition is: rank of x >= 2 - * + * * Outputs arrays: * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors - * + * * Integer arguments: * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case * IArgs[1] - bool, whether to calculate full-sized u and v * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely: * if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied - * Recommended value is 16. + * Recommended value is 16. */ // #if NOT_EXCLUDED(OP_svd) @Namespace("sd::ops") public static class svd extends DeclarableCustomOp { @@ -22963,7 +22757,35 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public svd() { super((Pointer)null); allocate(); } private native void allocate(); public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); - } + } +// #endif + + /** + * calculates square root of matrix such that + * x[..., M, M] = z[..., M, M] x z[..., M, M] + * + * Input array: + * x[..., M, M], the necessary condition is: rank of x >= 2 and equality of last two dimensions + * + * Outputs arrays: + * z - same shape as x + */ +// #if NOT_EXCLUDED(OP_sqrtm) + @Namespace("sd::ops") public static class sqrtm extends DeclarableOp { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public sqrtm(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public sqrtm(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public sqrtm position(long position) { + return (sqrtm)super.position(position); + } + + public sqrtm() { super((Pointer)null); allocate(); } + private native void allocate(); + public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block); + } // #endif diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java index c42b7f7a5..54f78dbf9 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java @@ -2107,14 +2107,16 @@ public class TransformOpValidation extends BaseOpValidation { //TODO: Methods failed ResizeLanczos5, ResizeMitchelcubic, ResizeArea for (ImageResizeMethod method : ImageResizeMethod.values()) { - if (method==ImageResizeMethod.ResizeLanczos5 || method==ImageResizeMethod.ResizeArea || method==ImageResizeMethod.ResizeMitchelcubic) + if (method==ImageResizeMethod.ResizeLanczos5 || method==ImageResizeMethod.ResizeArea || method==ImageResizeMethod.ResizeMitchellcubic) {continue;} + log.info("Trying {}", method); + Nd4j.getRandom().setSeed(12345); SameDiff sd = SameDiff.create(); boolean preserveAspectRatio = true; boolean antialias = true; - SDVariable inputImage = sd.var(Nd4j.rand(1, 5, 5, 3)); + SDVariable inputImage = sd.var(Nd4j.rand(DataType.FLOAT, 1, 5, 5, 3)); // NHWC format long[] expectedShape = new long[]{1, 3, 3, 3}; SDVariable requestedSize = sd.constant(Nd4j.createFromArray( new long[]{3, 3})); From 1233acf2ab53b273e6054f6cc2863c57342c43b8 Mon Sep 17 00:00:00 2001 From: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Date: Fri, 29 May 2020 11:01:02 +0300 Subject: [PATCH 16/21] Fix formatting, remove obsolete files (#439) * Update/remove obsolete files * Fix nd4j-parameter-server-parent folder and module name * Fix formatting for libnd4j pom * Remove LICENSE file check for libnd4j build * Temp revert removing encoding and version for nd4j-parameter-server-model, nd4j-parameter-server-node, nd4j-parameter-server-client --- arbiter/.travis.yml | 24 -- arbiter/ci/build-linux-x86_64.sh | 28 -- arbiter/ci/settings.xml | 28 -- {arbiter/contrib => contrib}/formatter.xml | 0 datavec/.travis.yml | 28 -- datavec/LICENSE | 201 ---------- datavec/ci/build-linux-x86_64.sh | 29 -- datavec/ci/settings.xml | 28 -- datavec/contrib/formatter.xml | 353 ------------------ deeplearning4j/.travis.yml | 32 -- deeplearning4j/LICENSE.txt | 202 ---------- deeplearning4j/ci/build-linux-x86_64.sh | 30 -- deeplearning4j/ci/settings.xml | 28 -- deeplearning4j/contrib/formatter.xml | 353 ------------------ jumpy/.travis.yml | 25 -- jumpy/LICENSE | 201 ---------- libnd4j/CMakeLists.txt | 1 - libnd4j/LICENSE | 202 ---------- libnd4j/pom.xml | 35 +- nd4j/.appveyor.yml | 45 --- nd4j/.codeclimate.yml | 32 -- nd4j/.travis.yml | 96 ----- nd4j/LICENSE | 201 ---------- nd4j/VERSION | 1 - nd4j/buildAllversions.sh | 23 -- nd4j/ci/build-android.sh | 47 --- nd4j/ci/build-ios.sh | 49 --- nd4j/ci/build-linux-x86_64.sh | 77 ---- nd4j/ci/build-macosx-x86_64.sh | 71 ---- nd4j/ci/build-windows-x86_64.cmd | 49 --- nd4j/ci/settings.xml | 28 -- nd4j/contrib/formatter.xml | 353 ------------------ .../nd4j-parameter-server-client/pom.xml | 28 +- .../pom.xml | 21 +- .../model/MasterConnectionInfo.java | 0 .../parameterserver/model/MasterStatus.java | 0 .../parameterserver/model/ServerState.java | 0 .../parameterserver/model/ServerType.java | 0 .../parameterserver/model/ServerTypeJson.java | 0 .../model/SlaveConnectionInfo.java | 0 .../parameterserver/model/SlaveStatus.java | 0 .../model/SubscriberState.java | 0 .../nd4j-parameter-server-node/pom.xml | 40 +- .../pom.xml | 26 +- .../nd4j-parameter-server-status/pom.xml | 28 +- .../nd4j-parameter-server/pom.xml | 35 +- nd4j/nd4j-parameter-server-parent/pom.xml | 65 ++-- nd4j/nd4j-remote/README.md | 0 nd4j/nd4j-serde/nd4j-aeron/LICENSE | 201 ---------- nd4j/nd4j-serde/nd4j-aeron/README.md | 1 - nd4s/.travis.yml | 17 - nd4s/LICENSE | 202 ---------- perform-release.sh | 6 +- pydatavec/.travis.yml | 25 -- pydatavec/LICENSE | 201 ---------- pydatavec/README.md | 1 - pydl4j/LICENSE | 201 ---------- pydl4j/README.md | 1 - rl4j/LICENSE.txt | 201 ---------- rl4j/README.md | 8 +- rl4j/contrib/formatter.xml | 353 ------------------ rl4j/{ => docs/images}/cartpole.gif | Bin rl4j/{ => docs/images}/doom.gif | Bin rl4j/{ => docs/images}/malmo.gif | Bin rl4j/scoregraph.png | Bin 90128 -> 0 bytes scalnet/.travis.yml | 11 - 66 files changed, 125 insertions(+), 4447 deletions(-) delete mode 100644 arbiter/.travis.yml delete mode 100755 arbiter/ci/build-linux-x86_64.sh delete mode 100644 arbiter/ci/settings.xml rename {arbiter/contrib => contrib}/formatter.xml (100%) delete mode 100644 datavec/.travis.yml delete mode 100644 datavec/LICENSE delete mode 100755 datavec/ci/build-linux-x86_64.sh delete mode 100644 datavec/ci/settings.xml delete mode 100644 datavec/contrib/formatter.xml delete mode 100644 deeplearning4j/.travis.yml delete mode 100755 deeplearning4j/LICENSE.txt delete mode 100755 deeplearning4j/ci/build-linux-x86_64.sh delete mode 100644 deeplearning4j/ci/settings.xml delete mode 100644 deeplearning4j/contrib/formatter.xml delete mode 100644 jumpy/.travis.yml delete mode 100644 jumpy/LICENSE delete mode 100755 libnd4j/LICENSE delete mode 100644 nd4j/.appveyor.yml delete mode 100644 nd4j/.codeclimate.yml delete mode 100644 nd4j/.travis.yml delete mode 100644 nd4j/LICENSE delete mode 100644 nd4j/VERSION delete mode 100755 nd4j/buildAllversions.sh delete mode 100755 nd4j/ci/build-android.sh delete mode 100755 nd4j/ci/build-ios.sh delete mode 100755 nd4j/ci/build-linux-x86_64.sh delete mode 100755 nd4j/ci/build-macosx-x86_64.sh delete mode 100644 nd4j/ci/build-windows-x86_64.cmd delete mode 100644 nd4j/ci/settings.xml delete mode 100644 nd4j/contrib/formatter.xml rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/pom.xml (78%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerState.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerType.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java (100%) rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java (100%) delete mode 100644 nd4j/nd4j-remote/README.md delete mode 100644 nd4j/nd4j-serde/nd4j-aeron/LICENSE delete mode 100644 nd4j/nd4j-serde/nd4j-aeron/README.md delete mode 100644 nd4s/.travis.yml delete mode 100644 nd4s/LICENSE delete mode 100644 pydatavec/.travis.yml delete mode 100644 pydatavec/LICENSE delete mode 100644 pydl4j/LICENSE delete mode 100644 rl4j/LICENSE.txt delete mode 100644 rl4j/contrib/formatter.xml rename rl4j/{ => docs/images}/cartpole.gif (100%) rename rl4j/{ => docs/images}/doom.gif (100%) rename rl4j/{ => docs/images}/malmo.gif (100%) delete mode 100644 rl4j/scoregraph.png delete mode 100644 scalnet/.travis.yml diff --git a/arbiter/.travis.yml b/arbiter/.travis.yml deleted file mode 100644 index 30638a6a9..000000000 --- a/arbiter/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -branches: - only: - - master -notifications: - email: false -dist: trusty -sudo: false -cache: - directories: - - $HOME/.m2 -language: java -jdk: - - openjdk8 -matrix: - include: - - os: linux - env: OS=linux-x86_64 SCALA=2.10 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 SCALA=2.11 - install: true - script: bash ./ci/build-linux-x86_64.sh - diff --git a/arbiter/ci/build-linux-x86_64.sh b/arbiter/ci/build-linux-x86_64.sh deleted file mode 100755 index 7b230df6f..000000000 --- a/arbiter/ci/build-linux-x86_64.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - MAVEN_PHASE="deploy" -else - MAVEN_PHASE="install" -fi - -bash change-scala-versions.sh $SCALA -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype - diff --git a/arbiter/ci/settings.xml b/arbiter/ci/settings.xml deleted file mode 100644 index e6fd58339..000000000 --- a/arbiter/ci/settings.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - sonatype-nexus-snapshots - ${env.SONATYPE_USERNAME} - ${env.SONATYPE_PASSWORD} - - - diff --git a/arbiter/contrib/formatter.xml b/contrib/formatter.xml similarity index 100% rename from arbiter/contrib/formatter.xml rename to contrib/formatter.xml diff --git a/datavec/.travis.yml b/datavec/.travis.yml deleted file mode 100644 index 96c7989e8..000000000 --- a/datavec/.travis.yml +++ /dev/null @@ -1,28 +0,0 @@ -branches: - only: - - master -notifications: - email: false -dist: trusty -sudo: false -cache: - directories: - - $HOME/.m2 -language: java -jdk: - - openjdk8 -matrix: - include: - - os: linux - env: OS=linux-x86_64 SCALA=2.10 SPARK=1 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 SCALA=2.11 SPARK=1 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 SCALA=2.11 SPARK=2 - install: true - script: bash ./ci/build-linux-x86_64.sh - diff --git a/datavec/LICENSE b/datavec/LICENSE deleted file mode 100644 index ad410e113..000000000 --- a/datavec/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/datavec/ci/build-linux-x86_64.sh b/datavec/ci/build-linux-x86_64.sh deleted file mode 100755 index d28bc32ad..000000000 --- a/datavec/ci/build-linux-x86_64.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - MAVEN_PHASE="deploy" -else - MAVEN_PHASE="install" -fi - -bash change-scala-versions.sh $SCALA -bash change-spark-versions.sh $SPARK -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype - diff --git a/datavec/ci/settings.xml b/datavec/ci/settings.xml deleted file mode 100644 index e6fd58339..000000000 --- a/datavec/ci/settings.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - sonatype-nexus-snapshots - ${env.SONATYPE_USERNAME} - ${env.SONATYPE_PASSWORD} - - - diff --git a/datavec/contrib/formatter.xml b/datavec/contrib/formatter.xml deleted file mode 100644 index d6cc96bf6..000000000 --- a/datavec/contrib/formatter.xml +++ /dev/null @@ -1,353 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/deeplearning4j/.travis.yml b/deeplearning4j/.travis.yml deleted file mode 100644 index 1df95a97f..000000000 --- a/deeplearning4j/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -branches: - only: - - master -notifications: - email: false -dist: trusty -sudo: false -cache: - directories: - - $HOME/.m2 -language: java -jdk: - - openjdk8 -matrix: - include: - - os: linux - env: OS=linux-x86_64 CUDA=8.0 CUDNN=6 SCALA=2.10 SPARK=1 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=9.0 CUDNN=7 SCALA=2.11 SPARK=1 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=9.0 CUDNN=7 SCALA=2.11 SPARK=2 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=9.1 CUDNN=7 SCALA=2.11 SPARK=2 - install: true - script: bash ./ci/build-linux-x86_64.sh - diff --git a/deeplearning4j/LICENSE.txt b/deeplearning4j/LICENSE.txt deleted file mode 100755 index f0d9c68a3..000000000 --- a/deeplearning4j/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/deeplearning4j/ci/build-linux-x86_64.sh b/deeplearning4j/ci/build-linux-x86_64.sh deleted file mode 100755 index b8474a793..000000000 --- a/deeplearning4j/ci/build-linux-x86_64.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - MAVEN_PHASE="deploy" -else - MAVEN_PHASE="install" -fi - -bash change-cuda-versions.sh $CUDA -bash change-scala-versions.sh $SCALA -bash change-spark-versions.sh $SPARK -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype - diff --git a/deeplearning4j/ci/settings.xml b/deeplearning4j/ci/settings.xml deleted file mode 100644 index e6fd58339..000000000 --- a/deeplearning4j/ci/settings.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - sonatype-nexus-snapshots - ${env.SONATYPE_USERNAME} - ${env.SONATYPE_PASSWORD} - - - diff --git a/deeplearning4j/contrib/formatter.xml b/deeplearning4j/contrib/formatter.xml deleted file mode 100644 index d6cc96bf6..000000000 --- a/deeplearning4j/contrib/formatter.xml +++ /dev/null @@ -1,353 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/jumpy/.travis.yml b/jumpy/.travis.yml deleted file mode 100644 index 42fb54b1a..000000000 --- a/jumpy/.travis.yml +++ /dev/null @@ -1,25 +0,0 @@ -sudo: required -dist: trusty -language: python -python: - - "2.7" - - "3.6" -before_install: - - sudo apt-get install -y python-dev python-pip python-virtualenv pkg-config -install: - - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - fi - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - - conda info -a - - travis_retry conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION nose - - source activate test-environment - - pip install Cython --install-option="--no-cython-compile" - - pip install -e .[tests] -script: py.test --pep8 -m pep8 \ No newline at end of file diff --git a/jumpy/LICENSE b/jumpy/LICENSE deleted file mode 100644 index 8dada3eda..000000000 --- a/jumpy/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 9610d2890..3376bd6b6 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -251,7 +251,6 @@ set(CPACK_PACKAGE_VERSION_MINOR "8") set(CPACK_PACKAGE_VERSION_PATCH "0") set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") set(CPACK_PACKAGE_INSTALL_DIRECTORY "libnd4j") -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") # Determine distribution and release — may require redhat-lsb-core installed on CentOS / RH diff --git a/libnd4j/LICENSE b/libnd4j/LICENSE deleted file mode 100755 index 8f71f43fe..000000000 --- a/libnd4j/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/libnd4j/pom.xml b/libnd4j/pom.xml index d682da24c..d1d9944fa 100644 --- a/libnd4j/pom.xml +++ b/libnd4j/pom.xml @@ -1,5 +1,4 @@ - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + 4.0.0 org.deeplearning4j @@ -26,8 +27,6 @@ 1.0.0-SNAPSHOT - 4.0.0 - org.nd4j libnd4j pom @@ -38,14 +37,6 @@ http://nd4j.org/ - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - agibsonccc @@ -105,7 +96,6 @@ - org.bytedeco javacpp @@ -124,8 +114,10 @@ /org/bytedeco/openblas/${libnd4j.platform}/ - /${javacpp.platform.library.path}/include/ - /org/bytedeco/openblas/${libnd4j.platform}/include/ + /${javacpp.platform.library.path}/include/ + + /org/bytedeco/openblas/${libnd4j.platform}/include/ + /${javacpp.platform.library.path}/ @@ -234,7 +226,6 @@ - build-windows @@ -247,7 +238,6 @@ sh - build-unix @@ -258,8 +248,6 @@ bash - - libnd4j-single-thread @@ -292,7 +280,8 @@ - ${libnd4j.platform}-${libnd4j.chip}-${cuda.version} + ${libnd4j.platform}-${libnd4j.chip}-${cuda.version} + @@ -395,7 +384,6 @@ org.codehaus.mojo exec-maven-plugin 1.6.0 - libnd4j-test-clean @@ -416,9 +404,6 @@ - - - libnd4j-helper-avx2 diff --git a/nd4j/.appveyor.yml b/nd4j/.appveyor.yml deleted file mode 100644 index 28eb2bd76..000000000 --- a/nd4j/.appveyor.yml +++ /dev/null @@ -1,45 +0,0 @@ -branches: - only: - - master -version: '{build}' -image: Visual Studio 2015 -cache: - - '%USERPROFILE%\.m2' -clone_depth: 50 -environment: - matrix: - - EXT: - SCALA: 2.10 - - EXT: avx2 - SCALA: 2.11 -# - EXT: avx512 -# SCALA: 2.11 - - CUDA: 8.0 - SCALA: 2.10 - - CUDA: 9.0 - SCALA: 2.11 - - CUDA: 9.1 - SCALA: 2.11 - -init: - - wmic computersystem set AutomaticManagedPagefile=False - - wmic pagefile list /format:list -# - wmic pagefileset create name="C:\pagefile.sys" - - wmic pagefileset where name="C:\\pagefile.sys" set InitialSize=8192,MaximumSize=8192 - - wmic pagefileset list /format:list - - ps: write-host "Restarting..." - - ps: sleep 5 - - ps: restart-computer -f - - ps: sleep 5 - - ps: write-host "Restarted." - - wmic pagefile list /format:list - -install: - -build_script: - - '%APPVEYOR_BUILD_FOLDER%\ci\build-windows-x86_64.cmd' - -test_script: - -on_finish: - diff --git a/nd4j/.codeclimate.yml b/nd4j/.codeclimate.yml deleted file mode 100644 index 5ce6e83a5..000000000 --- a/nd4j/.codeclimate.yml +++ /dev/null @@ -1,32 +0,0 @@ -version: "2" # required to adjust maintainability checks -checks: - argument-count: - config: - threshold: 4 - complex-logic: - config: - threshold: 4 - file-lines: - config: - threshold: 5000 - method-complexity: - config: - threshold: 10 - method-count: - config: - threshold: 500 - method-lines: - config: - threshold: 50 - nested-control-flow: - config: - threshold: 4 - return-statements: - config: - threshold: 20 - similar-code: - config: - threshold: # language-specific defaults. an override will affect all languages. - identical-code: - config: - threshold: # language-specific defaults. an override will affect all languages. diff --git a/nd4j/.travis.yml b/nd4j/.travis.yml deleted file mode 100644 index f74ab9ba0..000000000 --- a/nd4j/.travis.yml +++ /dev/null @@ -1,96 +0,0 @@ -branches: - only: - - master -notifications: - email: false -dist: trusty -sudo: required -cache: - directories: - - $HOME/.m2 -language: java -services: - - docker -matrix: - include: - - os: linux - env: OS=android-arm SCALA=2.10 - install: true - script: bash ./ci/build-android.sh - - os: linux - env: OS=android-arm64 SCALA=2.11 - install: true - script: bash ./ci/build-android.sh - - os: linux - env: OS=android-x86 SCALA=2.10 - install: true - script: bash ./ci/build-android.sh - - os: linux - env: OS=android-x86_64 SCALA=2.11 - install: true - script: bash ./ci/build-android.sh - - os: osx - osx_image: xcode7.3 - env: OS=ios-arm64 SCALA=2.10 - install: true - script: bash ./ci/build-ios.sh - - os: osx - osx_image: xcode7.3 - env: OS=ios-x86_64 SCALA=2.11 - install: true - script: bash ./ci/build-ios.sh - - os: linux - env: OS=linux-x86_64 SCALA=2.10 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 EXT=avx2 SCALA=2.11 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 EXT=avx512 SCALA=2.11 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=8.0 SCALA=2.10 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=9.0 SCALA=2.11 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: linux - env: OS=linux-x86_64 CUDA=9.1 SCALA=2.11 - install: true - script: bash ./ci/build-linux-x86_64.sh - - os: osx - osx_image: xcode7.3 - env: OS=macosx-x86_64 SCALA=2.10 - install: true - script: bash ./ci/build-macosx-x86_64.sh - - os: osx - osx_image: xcode7.3 - env: OS=macosx-x86_64 EXT=avx2 SCALA=2.11 - install: true - script: bash ./ci/build-macosx-x86_64.sh - - os: osx - osx_image: xcode9.2 - env: OS=macosx-x86_64 EXT=avx512 SCALA=2.11 - install: true - script: bash ./ci/build-macosx-x86_64.sh - - os: osx - osx_image: xcode7.3 - env: OS=macosx-x86_64 CUDA=8.0 SCALA=2.10 - install: true - script: bash ./ci/build-macosx-x86_64.sh - - os: osx - osx_image: xcode8.3 - env: OS=macosx-x86_64 CUDA=9.0 SCALA=2.11 - install: true - script: bash ./ci/build-macosx-x86_64.sh - - os: osx - osx_image: xcode8.3 - env: OS=macosx-x86_64 CUDA=9.1 SCALA=2.11 - install: true - script: bash ./ci/build-macosx-x86_64.sh - diff --git a/nd4j/LICENSE b/nd4j/LICENSE deleted file mode 100644 index 5c304d1a4..000000000 --- a/nd4j/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/nd4j/VERSION b/nd4j/VERSION deleted file mode 100644 index a1a513826..000000000 --- a/nd4j/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.4-rc3.9-SNAPSHOT diff --git a/nd4j/buildAllversions.sh b/nd4j/buildAllversions.sh deleted file mode 100755 index 4a99b8f74..000000000 --- a/nd4j/buildAllversions.sh +++ /dev/null @@ -1,23 +0,0 @@ -#! /bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -eu -./change-cuda-versions.sh 9.0 # should be idempotent, this is the default -./buildmultiplescalaversions.sh "$@" -./change-cuda-versions.sh 8.0 -./buildmultiplescalaversions.sh "$@" -./change-cuda-versions.sh 9.0 #back to default diff --git a/nd4j/ci/build-android.sh b/nd4j/ci/build-android.sh deleted file mode 100755 index e31c4f4bb..000000000 --- a/nd4j/ci/build-android.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -while true; do echo .; sleep 60; done & - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - BRANCH=$TRAVIS_BRANCH - MAVEN_PHASE="deploy" -else - BRANCH=$TRAVIS_PULL_REQUEST_BRANCH - MAVEN_PHASE="install" -fi - -if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then - git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 -fi - -mkdir $HOME/Android/ -curl --retry 10 -L https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -o $HOME/Android/android-ndk.zip -unzip -qq $HOME/Android/android-ndk.zip -d $HOME/Android/ -ln -s $HOME/Android/android-ndk-r16b $HOME/Android/android-ndk -export ANDROID_NDK=$HOME/Android/android-ndk - -cd $TRAVIS_BUILD_DIR/../libnd4j/ -sed -i /cmake_minimum_required/d CMakeLists.txt -MAKEJ=2 bash buildnativeoperations.sh -platform $OS -cd $TRAVIS_BUILD_DIR/ -bash change-scala-versions.sh $SCALA -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \ - -Djavacpp.platform=$OS -pl '!nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests' - diff --git a/nd4j/ci/build-ios.sh b/nd4j/ci/build-ios.sh deleted file mode 100755 index aff29566c..000000000 --- a/nd4j/ci/build-ios.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -while true; do echo .; sleep 60; done & - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - BRANCH=$TRAVIS_BRANCH - MAVEN_PHASE="deploy" -else - BRANCH=$TRAVIS_PULL_REQUEST_BRANCH - MAVEN_PHASE="install" -fi - -if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then - git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 -fi - -brew update -brew upgrade maven || true -brew install gcc || true -brew link --overwrite gcc - -/usr/local/bin/gcc-? --version -mvn -version - -cd $TRAVIS_BUILD_DIR/../libnd4j/ -sed -i="" /cmake_minimum_required/d CMakeLists.txt -MAKEJ=2 bash buildnativeoperations.sh -platform $OS -cd $TRAVIS_BUILD_DIR/ -bash change-scala-versions.sh $SCALA -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \ - -Djavacpp.platform=$OS -Djavacpp.platform.compiler=clang++ -pl '!nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests' - diff --git a/nd4j/ci/build-linux-x86_64.sh b/nd4j/ci/build-linux-x86_64.sh deleted file mode 100755 index 8fb6c7ae4..000000000 --- a/nd4j/ci/build-linux-x86_64.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -while true; do echo .; sleep 60; done & - -sudo fallocate -l 4GB /swapfile -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - BRANCH=$TRAVIS_BRANCH - MAVEN_PHASE="deploy" -else - BRANCH=$TRAVIS_PULL_REQUEST_BRANCH - MAVEN_PHASE="install" -fi - -if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then - git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 -fi - -PROTOBUF=3.5.1 -curl --retry 10 -L https://github.com/google/protobuf/releases/download/v$PROTOBUF/protobuf-cpp-$PROTOBUF.tar.gz -o $HOME/protobuf-$PROTOBUF.tar.gz -tar -C $TRAVIS_BUILD_DIR/.. --totals -xf $HOME/protobuf-$PROTOBUF.tar.gz - -if [[ -n "${EXT:-}" ]]; then - DEVTOOLSET=6 -else - DEVTOOLSET=4 -fi - -if [[ -n "${CUDA:-}" ]]; then - DOCKER_IMAGE=nvidia/cuda:$CUDA-devel-centos6 -else - DOCKER_IMAGE=centos:6 -fi - -docker run -ti -e SONATYPE_USERNAME -e SONATYPE_PASSWORD -v $HOME/.m2:/root/.m2 -v $TRAVIS_BUILD_DIR/..:/build $DOCKER_IMAGE /bin/bash -evxc "\ - yum -y install centos-release-scl-rh epel-release; \ - yum -y install devtoolset-$DEVTOOLSET-toolchain rh-maven33 cmake3 git java-1.8.0-openjdk-devel; \ - source scl_source enable devtoolset-$DEVTOOLSET rh-maven33 || true; \ - cd /build/protobuf-$PROTOBUF/; \ - ./configure; \ - make -j2; \ - cd /build/libnd4j/; \ - sed -i /cmake_minimum_required/d CMakeLists.txt; \ - if [[ -n \"${CUDA:-}\" ]]; then \ - MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30; \ - cd /build/nd4j/; \ - bash change-cuda-versions.sh $CUDA; \ - EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests'; \ - else \ - MAKEJ=2 bash buildnativeoperations.sh -c cpu -e ${EXT:-}; \ - cd /build/nd4j/; \ - EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests'; \ - fi; \ - bash change-scala-versions.sh $SCALA; \ - mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \ - -Dlibnd4j.extension=${EXT:-} \$EXTRA_OPTIONS -DprotocCommand=/build/protobuf-$PROTOBUF/src/protoc;" - diff --git a/nd4j/ci/build-macosx-x86_64.sh b/nd4j/ci/build-macosx-x86_64.sh deleted file mode 100755 index 98abb17c4..000000000 --- a/nd4j/ci/build-macosx-x86_64.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -set -evx - -while true; do echo .; sleep 60; done & - -if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then - BRANCH=$TRAVIS_BRANCH - MAVEN_PHASE="deploy" -else - BRANCH=$TRAVIS_PULL_REQUEST_BRANCH - MAVEN_PHASE="install" -fi - -if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then - git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 -fi - -brew update -brew upgrade maven || true -brew install gcc || true -brew link --overwrite gcc - -/usr/local/bin/gcc-? --version -mvn -version - -if [[ "${CUDA:-}" == "8.0" ]]; then - CUDA_URL=https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_mac-dmg -elif [[ "${CUDA:-}" == "9.0" ]]; then - CUDA_URL=https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_mac-dmg -elif [[ "${CUDA:-}" == "9.1" ]]; then - CUDA_URL=https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_mac -fi -if [[ -n ${CUDA_URL:-} ]]; then - curl --retry 10 -L -o $HOME/cuda.dmg $CUDA_URL - hdiutil mount $HOME/cuda.dmg - sleep 5 - sudo /Volumes/CUDAMacOSXInstaller/CUDAMacOSXInstaller.app/Contents/MacOS/CUDAMacOSXInstaller --accept-eula --no-window -fi - -cd $TRAVIS_BUILD_DIR/../libnd4j/ -sed -i="" /cmake_minimum_required/d CMakeLists.txt -if [[ -n "${CUDA:-}" ]]; then - MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30 - cd $TRAVIS_BUILD_DIR/ - bash change-cuda-versions.sh $CUDA - EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests' -else - MAKEJ=2 bash buildnativeoperations.sh -c cpu -e ${EXT:-} - cd $TRAVIS_BUILD_DIR/ - EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests' -fi -bash change-scala-versions.sh $SCALA -mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \ - -Dlibnd4j.extension=${EXT:-} $EXTRA_OPTIONS - diff --git a/nd4j/ci/build-windows-x86_64.cmd b/nd4j/ci/build-windows-x86_64.cmd deleted file mode 100644 index 5f7af7bd8..000000000 --- a/nd4j/ci/build-windows-x86_64.cmd +++ /dev/null @@ -1,49 +0,0 @@ -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -echo on - -if "%APPVEYOR_PULL_REQUEST_NUMBER%" == "" ( - set BRANCH=%APPVEYOR_REPO_BRANCH% - set MAVEN_PHASE=deploy -) else ( - set BRANCH=%APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH% - set MAVEN_PHASE=install -) - -git -C "%APPVEYOR_BUILD_FOLDER%\.." clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=%BRANCH% -if %ERRORLEVEL% neq 0 ( - git -C "%APPVEYOR_BUILD_FOLDER%\.." clone https://github.com/deeplearning4j/libnd4j/ --depth=50 -) - -if "%CUDA%" == "8.0" ( - set "CUDA_URL=https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_windows-exe" -) -if "%CUDA%" == "9.0" ( - set "CUDA_URL=https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_windows-exe" -) -if "%CUDA%" == "9.1" ( - set "CUDA_URL=https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_windows" -) -if not "%CUDA_URL%" == "" ( - curl --retry 10 -L -o cuda.exe %CUDA_URL% - cuda.exe -s - set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%" - set "CUDA_PATH_V%CUDA:.=_%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%" - set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%\libnvvp;%PATH%" -) - -set "PATH=C:\msys64\usr\bin\core_perl;C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%" -bash -lc "pacman -Syu --noconfirm" -bash -lc "pacman -Su --noconfirm" -bash -lc "pacman -S --needed --noconfirm base-devel make mingw-w64-x86_64-cmake mingw-w64-x86_64-gcc" - -if not "%CUDA%" == "" ( - bash -c "cd ../libnd4j/; MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30" - bash -c "bash change-cuda-versions.sh $CUDA" - set "EXTRA_OPTIONS=-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests" -) else ( - bash -c "cd ../libnd4j/; MAKEJ=2 bash buildnativeoperations.sh -c cpu -e $EXT" - set "EXTRA_OPTIONS=-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests" -) -bash -c "bash change-scala-versions.sh $SCALA" -call mvn clean %MAVEN_PHASE% -B -U --settings .\ci\settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype ^ - -Dlibnd4j.extension=%EXT% %EXTRA_OPTIONS% diff --git a/nd4j/ci/settings.xml b/nd4j/ci/settings.xml deleted file mode 100644 index e6fd58339..000000000 --- a/nd4j/ci/settings.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - sonatype-nexus-snapshots - ${env.SONATYPE_USERNAME} - ${env.SONATYPE_PASSWORD} - - - diff --git a/nd4j/contrib/formatter.xml b/nd4j/contrib/formatter.xml deleted file mode 100644 index d6cc96bf6..000000000 --- a/nd4j/contrib/formatter.xml +++ /dev/null @@ -1,353 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml index 5b237f65c..75024af0f 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server-client - jar nd4j-parameter-server-client @@ -34,7 +37,6 @@ unirest-java ${unirest.version} - org.nd4j nd4j-parameter-server-model @@ -58,22 +60,17 @@ org.nd4j nd4j-parameter-server - ${project.version} test - ch.qos.logback logback-classic ${logback.version} test - org.nd4j nd4j-common-tests - ${project.version} - test @@ -81,11 +78,9 @@ testresources - nd4j-testresources - nd4j-tests-cpu @@ -116,7 +111,6 @@ - nd4j-tests-cuda diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml similarity index 78% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml index 350f53806..07d99e966 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server-model - jar nd4j-parameter-server-model UTF-8 - 1.8 - 1.8 diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java similarity index 100% rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml index f1c168c33..07a04f80d 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server-node_2.11 - jar nd4j-parameter-server-node - - - - org.apache.maven.plugins - maven-compiler-plugin - - 8 - 8 - - - - @@ -64,27 +55,21 @@ org.nd4j nd4j-parameter-server - ${project.version} - ch.qos.logback logback-classic ${logback.version} test - io.reactivex.rxjava2 rxjava 2.2.0 - org.nd4j nd4j-common-tests - ${project.version} - test @@ -92,11 +77,9 @@ testresources - nd4j-testresources - nd4j-tests-cpu @@ -127,7 +110,6 @@ - nd4j-tests-cuda diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml index 3ba5a156a..733d1ae1b 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server-rocksdb-storage - jar nd4j-parameter-server-rocksdb-storage - org.rocksdb @@ -38,20 +40,14 @@ org.nd4j nd4j-parameter-server - ${project.version} - junit junit - test - org.nd4j nd4j-common-tests - ${project.version} - test diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml index 7c2783904..f24a1de89 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server-status_2.11 - jar nd4j-parameter-server-status @@ -41,25 +44,19 @@ mapdb ${mapdb.version} - org.nd4j nd4j-parameter-server - ${project.version} - junit junit - test - com.typesafe.play play-netty-server_2.11 ${playframework.version} - com.typesafe.play play-java_2.11 @@ -95,12 +92,9 @@ - org.nd4j nd4j-common-tests - ${project.version} - test diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml index d96eb02f4..0325f2d52 100644 --- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml @@ -1,3 +1,4 @@ + - - - nd4j-parameter-server-parent - org.nd4j - 1.0.0-SNAPSHOT - + + 4.0.0 + + org.nd4j + nd4j-parameter-server-parent + 1.0.0-SNAPSHOT + + nd4j-parameter-server - jar nd4j-parameter-server @@ -34,6 +37,11 @@ nd4j-parameter-server-model ${project.version} + + org.nd4j + nd4j-aeron + ${project.version} + org.slf4j slf4j-log4j12 @@ -43,11 +51,6 @@ junit junit - - org.nd4j - nd4j-aeron - ${project.version} - com.beust jcommander @@ -58,12 +61,9 @@ unirest-java ${unirest.version} - org.nd4j nd4j-common-tests - ${project.version} - test @@ -71,11 +71,9 @@ testresources - nd4j-testresources - nd4j-tests-cpu @@ -106,7 +104,6 @@ - nd4j-tests-cuda diff --git a/nd4j/nd4j-parameter-server-parent/pom.xml b/nd4j/nd4j-parameter-server-parent/pom.xml index 4d77e3e96..3a160d2e3 100644 --- a/nd4j/nd4j-parameter-server-parent/pom.xml +++ b/nd4j/nd4j-parameter-server-parent/pom.xml @@ -1,3 +1,4 @@ + - + + + 4.0.0 + - nd4j org.nd4j + nd4j 1.0.0-SNAPSHOT - 4.0.0 nd4j-parameter-server-parent pom nd4j-parameter-server-parent + + nd4j-parameter-server + nd4j-parameter-server-client + nd4j-parameter-server-model + nd4j-parameter-server-status + nd4j-parameter-server-rocksdb-storage + nd4j-parameter-server-node + + + + 1.8 + 1.8 + + + + + + org.nd4j + nd4j-common-tests + ${project.version} + test + + + org.nd4j + nd4j-parameter-server + ${project.version} + + + @@ -52,12 +85,10 @@ - com.google.code.play2-maven-plugin play2-maven-plugin ${maven-play2-plugin.version} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/rl4j/cartpole.gif b/rl4j/docs/images/cartpole.gif similarity index 100% rename from rl4j/cartpole.gif rename to rl4j/docs/images/cartpole.gif diff --git a/rl4j/doom.gif b/rl4j/docs/images/doom.gif similarity index 100% rename from rl4j/doom.gif rename to rl4j/docs/images/doom.gif diff --git a/rl4j/malmo.gif b/rl4j/docs/images/malmo.gif similarity index 100% rename from rl4j/malmo.gif rename to rl4j/docs/images/malmo.gif diff --git a/rl4j/scoregraph.png b/rl4j/scoregraph.png deleted file mode 100644 index dd317734e30c3098de168d3184b1f5edd04aa49b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 90128 zcmdqJ1ydbc7cH6q!5xCTySux)I|**VT|;o)!QCB#1$TFMcXxMt`+Mh*oLl!F+^VjE zqPkblo@@FTix34laX4r!=no%0z)4DoD1G<s3@}J z#5B4ulJ@o!FCip{IR;ZPCmRgRN|S`(%ZDBV1Z@|0c^7xdNlhvb5i*R7np{`sW7U9=Q*P5;+N(nP{)lL9(*4BV%-e zrmM5b8SI->e`U7i0uxffO`|;8I>_9v`uleQ5SaK6J6k_qSe7yVs)|MgC_BtWiLLzK zJ1f2bKap-;zANId?%Rb(1Rm-U+o116{(iv!Qw!cl18$46c`WtPXS5Xbwx&;3HIb#j>JtwFVD^wrtM-H6t1y)sOjq;DfmDjo~?mP0No+p%bS z$wuHcGd1wqlHPiHtC5#jec2nG=!Aca;Uh8w$rl=YT<3xTk|J?J8IzcBi*#W2nC4M8I`q(PE}3Q;^f( z3l@F__Y{W~KPLYMDXA$euKf+UMLl#z1~)D(?JoDt>B@-Zy!G)n)S=O7v}E_dw#W0+RMp+h=HnT2J5!6KI~zSM{5AetouqfMzpO&R zhQk_ln{tq+nwstOo@UwbHQrUy>;JyWFD)p-O=I2pq-KftN?H87r0%GYZlrE^L(IEq zZ1Pv`;&R72O=^@2?^7hjIis0_4JL z;cC*dTqqu{vh-IQc)2#ON!7eWFUpqfwc|>E_bVzHT#C~O>ORc$paYf>$2$*w0V84mmy$BolNsw!x4Fy&Ln43;tV z*NNJfQNBC--&7hF8lD!QZIX}SFSHJ(qT%yG#Bf_$-RfAfnK13`YFZRq6zo;2&n5m+ zgQ25UD=ErfEW_5+*EJj%Vl@=0Wt~%md&-&mI!IkkHNrX&qv5QwDCLm5CKsffD06x$ z;3E5gK|vWw{Aahpz#7w5P*703h`k(M=?WTy5Nd9{PKs{?M{ssmoKKezyq6Db6Ni77 zF{jAL6&MgYx>|qvQ07Sd9)EO*GZ7swFMsdgxw$78_@gvu4yz~`z*+POF*uS*e8G`> zHx|S}urFRLDZ`N)Q^g;zn&zY`eR0;gk^w5aJaslXe}XVqbd1SoGp-~*3_C|sfGX2zCrsQH+*}GwwKii&=$I_1hBZDpKf3D|O{ zXR-`vI#Ijt`SIA2)<^4*u0I)UlnuoAEG|IQpV#wzqwdU=d)}q|4D0duY}NfiTm92h zX88qSapZpU?Tz4q{QC}8Ktff+)`|p)a`*WSt8NipC^&Vom#OT;3qJWz4NG@6M33>e_v*UJVj%eW4HWQDd3v zJqETPAb($BYqX<30wKM?ve+qr(rnzDG$2R#R8MNPXB&|1>T>t>v@1!v=_a+?0%@pG0q0f@}9vc6(7b=|UB>`5WjFm`QmC-)%nnQPh7N8CV`j9wTeuZ?t(k-i08TX4H`oK9&us;h*6=_m-_#PN7&g zeCdJsy<6*8W2tYeN56u$4v+p?(f$iCij~4UN420c(%=@KT&y({#%a~la}$skpb7J4 z+#vkNVylurMDwv-vx)?Y%7yp1z=eD5BG(1E>$*StWxNRt$ZC%UJA++j(M44oOk^S` z@NwagRaxn={4XjN$QXHvDr%}pny0g|nD33IF~F28WefKHt=b?JLHZX_aFHTL&c2;C zyIU?lw}{HONc5++?TnsF`3{K3EF3%bCr@lRCeuiT-YyP?VtE)Vrn9#FYRvytQ8>7< zdd7RV%)GNQdLCx${x0)JCQp+gw3*yR=Gddx)BI&?0V{>1~Vr4);hLpWm$)*r|xGFMyWXOlR0e`mfk` zT;@*%Zd`1vr)S)l6$rO)W3mEzM>TC&Dj#^_5{#nMBOR5y8t@IL>^k55wTNnWL6CpIrHy7&U4=MZw&U+IIh%nxmn z3uJH7k%=d>!}32l-`Y(y)9L%aXY}=B;&T<*p|(An9uMLJ;@`LH5K8=ylv*5p{*P#? z1DNeuo~c`Hh;riZHeHbkiDPwOX$M;>8S~pYXB+l^kARpViZRPS;ozcTrtJMs@WCXe8GkvAk^Vu?*>)2iO@pLTz{iMGr=Xu?ncS#EU89oAT!^JJ_?lS83Oy_KXX&)*rX{-tWS5A{3&D<`1v?X+3 zwTJh|27VAQF!5pHp`m?7BICcB5npY3^0QA`%8xy-b zx(bhrl@gpAEy*+Y#%|UJ-t*_5o%AALU}L-)&{t33@lw!x@@2Ef=+F{K|)x+{bF>l`1~36Z&V7YADaA%48e8CPp%*=TnHam(9;d z2z9qF_gj159F}ipZ|(aZz`MYq#GSv(r_ENhnhZSF*JJH$Z9b-ZV*k6{8q;8_dxHd^R=w4@Qho>zs&+Tg(oKL~q8&@jTh+pS+RejGGNW>=HaFH`YQvPiAPsry^ z=VNO1(gaN-Q!=nT+zV8q@|@1xVrB_EzK}q_5Yt9Zxe&0SBt4__y#miv({-K5ck#c( zgIS!s_~pc?e~Z5K9xJ1q73WDe9{rJ=y22yK6YyhhH%>4dAs{OjkaZqcFaBp?=iJVsja@XUX48 zy2YYzV`TtafVU$_)JbfD?-}vsu%$_4Y#`|lC4(wjPWp1Rxw{zv?4Mk=b^h#gJG-xb ztEZ=<=j8xzf%na&o$>TQS;uYbguK_fqi`ayGa(`->ZwHUncfbJg{!}AL0WUMO}!{8 z4^BtfEGNx2t~;|6k_ivlVvE91kdyMcqN(tZS*~j5gRB$&*U3$RuUL$1dps$LHq2?g z+x7)^IG17gvGDM(>q%Yw^$)325X3Xh&McvUSYP?)skj3Bf=roBcjkJ?@lo?h4iO{%&(y#r_a0$#l8x&KuQM+#)KTejuUB#D`L^EHAE zpQ(ZUCI74ab`HNs>2VC_P_%4;JV+2O@d-pvyeH!#J}W05_TCLtO0cZGrMT68=UB)K zz~h@$g)+~rrth1mUm6qF#&uoGxCyXjHPiQh`gbzmTb0m`-PdHT_NmYFVbCds3B_C6 zRWaM#`Z>ME?#N=E2Y-GD9hXx+NpzX4+X z7x8%k??{gi*ElCZ-r4Hfvd2RpX_BPFdhTtHont*S6{&P%O7v$wMz$ZJKRRkr=);{wYw2aX>>h1ON|Dzv6B`#|(qWk!24)IZab${hQ4~Jcq{B9~&r|3H zSdO1Ty*P;n`KG2v#6A~QgwL6G(|TalohXtoB`9@i10hNKo3g%F@`U4 ze`@A+zxJx9lQ;LRS?RgFm)k0IHtxVS_Z#pdb5xS+%3$@odkLRuN3`Da?%|16vbz)I zna*1;Mcv3D^h15uzE`{Ub$JhTL#DKD2E(XE1F z8|=lmOWSjBxf2kW)8tFL4liWu;D0@N6ddGb^r9HFl#@DUCxP~HJkXLY!&>bevw;>L zGhE6p0Z~SJERy4)_`3hTu;N8x`PJWu!}GCZg?NP;MIG1%hlpE$9zzXDcOR=>=@Z#h z@>4B)@pV0T+MI<(LON+}>RsfWWJg4Ew)lBD@d2ahPu!OvFaz=7pY&F}#heS%TRJQ( z4Pz7)Y5bh-U{G!97Rm9-elcp({fHnvg>&XN*9gX|KTtbM)Txm!4wEEBxX;Ern;NtN{S1KMakXpRa&^;u31nqrJoMJP&VS&qVDA91MoOkr& zxX*Q5RkSWxMY`Z4DW_{QT$5Pazu$iyeR*_2xWNM!`)@y%`Ts4KXmmi@+-|mehr4eu zFZjJWKI>##(Bu5pk8A0kVmw6^&xk30PHUL;vgaY9Vp>iUQi=u)l=Rek+C3Q_vaTI3 z^Vd9gmd%T}n2w$ptmT}pwL}nkDSyYt!NHXYl|)wl2#=8W6&S^@YwFeCU62;U2Jfi; zWmENmbBOCvT#sWq-^yEG`K%^|dFPPnbab|5(}w>XvWEF09j+YU5A5#5`yI%8NyNIM zS`LVbOtdJWCpeeQBU$^Bhd~WNPia$KQVJ?qv#S!osPGjD>_j(17f%;Mib9w#*v__6 zV*zKxo^F@aD3x+HWd~$Mf?jW7KPpYM+)uxtQFkh(#`vT5C*F*fP0S$k!^yL-Dt%mepL2Iocb5xI&)rQ<@ zLeE(lx*}QSBQQ_v3hOGzD1Y<&b`(8(z|O$K7EU@_@as(UwZ8{A?anf4ZWC>r^Q z{A$s{aaVH8Ks+kLOTkf2or@xx+;B9T^&J>Tw(*^@b@iNgpq?b*b1vt&iRXBmI=s;B z(%snDc*%JQOzDA5gdiu-4nosD6xY9;&{K^y9T}?9f*G_-wV;64Hg9F6=clJ{o8A@D z(@wS=7<9sx$Zw zC)rl#rGxWSa&bPI6Ky0jhKBB`hU50Ji>#_FF*#Z37eFxzeDo;ZH=Y3ryd*2Zvl}Ey zJMY)}>Ni&di~-RIc>?1oWkG5gKFtbC56qa zSMIDkJ+;7*UYzP@x7y81EkYdDnf&ZQ3-u+{#LvG8>h1m{lqr(&J)9r_);9zD!!K3D zHYHRX;Ckk{%?qwAX0Ec~ZkqYI5RY0SKSccl!tyu`C@NxpP>|2tByE*p@E0xJLeuX zy5ypBl5G)f<$|zDkQZ6Fh_IMY`K-G>W-XcjgJRHDW+CCikNA*G z$D(Uo4at5|3$smja6wH_Q&Cfm-%&wBRhI=UH#m@A*Ea^x5uc|1DTc*B*1WCG-nzO=(_mfD}V z>ad?hHE&?1z?pQdOc44$N3KTE<)GyB;q=88Wg-D?&0>tdW=q|o_ zs_%;DTln2j%Ne4)MamIpb~JF_Xo#(dnMmuJ&Exd+wWYsRGSyR*8*gqr2Xjy2MIrUk z^~8`qbY5jdIoLoE*tnv33oBUk;_P#MaH;B85ck6+Sm%Z~X8XaWsuo!v;9Q|OEu{_)SAJ9!T+xj?hC+^?rjP!@k z#;q<}Xb(jZT~9>n`Xq;X=Q5K^L`=F$2((;!;jmJQ!-un)dvbknFnL@fBVSTD?${=r z6$nZJ`GmsT(Gg1cgYAxolP}e?!L;la=@=Op>YFCEFqcTPf+HB+2EykZ2fz5yidMcO z;tJyim%_uav(cG!n-Wxjk6Lnc-;9AzZpA*gsDUvFd%8)gh}WR@hhtCq4mU;xGLGMT z{xq9?D4>@9HN4W31&A+=7*G24b{_svq9n*~q#$qWB$S!OSPzDq(^ce7{z0Y~dfrp0{ z>2`hK3)^?ZbwnNNvJ<~lY!xRO8I7E>NAXqGT#*$dsl=-3vLsB}^2^CoI7_f^**{PO zHnxihhsk8wO0IXViHv=ba0BfSk`FCX zgZ|C+dTRDU+zvUCw~YEZo7T|jN($dSy-({+ky9PH`EeM<2WHj-Jn$8_&fQnqH~i%>*Uk`1 zJaHy=5Wx-e{gK`5{Bc9i0*1lpNL8%L476pWWphg-_$|-ChGnIeY*X=xT$qVb3nt@u z-lV${tRv-eH(C&7dGnx_LCu3U)3^r^oE6B}<@giqt3ujO#UQG;=z(?KH}ulxvl^5& zch9pqyoHEhOawo>0@NoE$poi{{bGkZ;-93#lY*zUhsa21lw^IpypZ`J+C;j&dC(-^ zW3>&P9E`5}rzO~9jaSxq{4QWD>|a<<)2rb?T5X)TU!x`L0}fp4fF+nQR60@UV|IW< z%~bGWpu~+|OtKJxU5+#K*-_wX;C}zUQA}l;y^&66b9_1^N^5piUdk$5N8hPvDtr+2 z09^8de%>i*0^J*>pCLXp0`#FN3t4Sd=X zM>!YdhV<{A`#o7eyX`6rq4lYcey5mwOg}DM!IEBqqk&^XDknk%lQfGRUXaXf*n3hJ zwKy<87CW^q21Q8%4r_ZWBw^)+)}tUgp;HesZkI~yCBF&(ZSJ?e)v$wpZ}KOZ^g}Jy zmr{e3ARfO0p9uNL65{d+IBnv|BMRcLUNYfQvGhouw53ONq=YZ9SKcKE?1yt*VzFz_r^UhgbgDY`3cF2c9__U71 zZwE`gM%qP3AHfU|Q{Z?IgFKSxA3)Z(DiBzf)#0xt zR84x``JFAlv~zdmpzt@`Gz+ZZabg)Rzv>r}$lNh9PSY-cM45`OD5=4uM#o|g|Fi&I zjkfxDR^^o;F~&FRU}a?0UE0F;$XwY=Rkl(z6PakV@PM~>6QVA1sMWQy8Jo445)YAT zoBG{c0TXu5Y1#1yq1TrPZ@GPG4LZ1p*q_ZBa%|30rubf{Y|~?y$h@%t!i%JO$$x_) zM->BxJ+LyG1` zCD~a%CYm(%ClX~T;LymZS8Tv7!ajhSPKGtFvw5!)#JW*17vobV!T8hk(rFS>RMoV` zaVL8zRKA#J_+mFCy_;w35oEri7C!HX)S+-dxHuod3vE&7h$Cay^!#*E+Up}Fnk~Z4 z7RW2d@;jws7~R(^?THFrl_2+p*BZWqPa; zIiK(o9$8iFtf`g6VFpH2^O#iEgyhpl->cvvO9Us}AL-02XKu2EsOf^E1c{s@B{f40 zWxy_l&gS``+AtD%sue4@`>$RG2#{K39VwP2~*Eui(}5P$}Gw;5mkw+q00aF&jOzb#QZ` zdg_BIv!Ft0Tu6HaiuUI?lvkrbl#_BY_m=*sAj)0AJO9Z4I>#7*+H{g5$VbKLzD=0c zl?FMY80Z4)Lh+U5F?aJ()6N>XcOu#OK_Gk1>sOwNh{$IV%12WqRv>5Pn(^VCP{yDQ zf~eFV=T%tR!^vm2Bc=|1rpaP7=6gY}WL&X){#EfUS;Hfs!-yC(S|1zEck-}3dD>}n zS3mfQFMwNmGoGO6tN%F@CYB5-+zLrt?L9dw3WfE^WqGeFD=W1vMU0|7yYn^Ogq#^@gMX#zN*Cg1~Qe}^>B=zS+H zCc^8(_Yl$=PlVJj+81hC$)XxYjwWFE$*n2Thg5xM2ETPT_0ME)6w6(YLMM@ZuId#W zHeBh;#Eo+B*gesxh1y2uzlfG#w0{bdck+ExB^=YTIwSsa~I>{x9c$P z?>mhFk!+rUGlf%kH^#bK?xq}w%uzZJ+7?mS^Ge10c!kF^$04{+Owz}6Jxi7SFU`C|7J>rm=80Klp9A9IPt0)m2o zdF`q1@$(+5&cmy)GXdsKGY_yNuMp{lap|vZu3uD){2H7ts1u1idt&HEI3;^z*zZYY z{m!)9{VAZU{g#05jxaHE=e66JVjtgP|J+?1n@ei41w!Rp1|zMPuNWJ?aaCj~BI0j5 z9cF82I6FR%sM8)rWbZnSvSPpyGacf3qU}w4IphUzE2?KX8Aur5ITn2^}`6jX#k?_cIYAhNEeC!&`69tJ(OuW3>;?&iqet$1J;^b@+IQveGkW zS}qr3Ws+c{VJ~Ysd*97rASh>P9$T{N>Xpe%QjSKhk78Md$Wd!HO@JST20=93_Idop zOe0a_;$x0EkC*#Ha8grX(>9W=(cIk(b=E?1A=b-qB3yy!1i(n(u?le548R~39Si5M zomS$Pr??$|#0b@o@F4p)^lKvT<6tz@JV$)o>u#yDNvO%;Q54n(UGBbVxv5H3_okuI zF_KZINwv%NCpDNSlGTU zGNz)7Ge@aI*8N=4iHX2To7DP>N4x?=P87NVulxH{*lz34P+?vTp| zNFmc-b?bnb&9z7u{|WtE`%^@4tc2$~81IH6(c%&0I&4+Pwd>{QuP?P54AaqpS6pOF zVr-X~kx<1bQi=|bhy}=T=N7ppD&!x;G4ZxLubKH%=WJ~aKV%LtvK)3CR&VV0b%^JR zT{3H%tz>(`mY(&zQwjyf!Cto-x`MQy404FabfcbE-hY#*uHNHAc&NWY!vhjoo*H?l-;wLd}C^UuWQ?&MfF0^ zMMW_(6DkiDb7YMa@B66fy9AL1G6DMbP!P_RWIkizc zEj|fg-u1%4!2Ri0#Jt6<=qgr4I#0Y)&1cV_`(O|afK9u2L+@{T*Fz^*F?_Z6$>9y3 z{s*;|Ms$%~v<)+!c_#)wE^nVb?+m|}wL&)yomaqX5SR!0sO7yU;r$P(1jq@uJ_i>T zgnH2|cSZm8!Y)l)YCbYONs`t1wL&ycTgj(7KA~_Tb`r9VEJ-f|hkkJ&$I)w|F||qWs1H@meV#5#z6Cel08os@dNFj6=(y&L>LlE*MM=AA6sct(Ni5R8jjE-4l zIfY#^R=1+Nyz_FjKt7jeJP&z4?VbAUy}dfCc%9vBrG0vZ(a7ojva2@WwxZld=axp< z$&%Bj)>P~Ha{97K{ATPqWOLRU>E*)X(dRn~8kqBC#_uD7_GB1y*M(UP%L;g z7Uftc zTe-RW^8MOpU4F<}oocwNUR@b>9*42(DEx z&k*wgyeGUD;-6Vqd#tHja4m3`5{W{JT?%f{(%89Min7H<&c{x-@DUa>7LGV2zlf! zn}%9h`})nmkzWj8%uCzqZ5X!XzLl4dOu%} zMyPuNisq>dTLfJ>-9~qgip)M*J99HlDo#FkJB=kbKEH&)7H}RTk^97k^JQLhP8ULX ztv5E}maxg>d;ZT%PjxDflVc&1%Mr?ynJrUCJ!)0cF9I_eqp`E0=egqkDNh|o_$Hn4{0xUPMoPA_v%lj8~R0{EO^Xhz0lpFd^S*tRZ5vYu~mU!mg+P8;9 zXx#;#o;w8wFG1a7bmy6GPY68`Zw_4gAQL3gDDhN94lK$W$HY4A4CUDeDAbQyE}EN6 z5=uPTfVtXkO%^wN#)0^2+WxZ@rtu#TGY*{$4Mcjs`yVZAJ%?C-N1aQU0r;Q`m1Ls{ z^nt06A4rw6bReC?VXt2mzk0)w^#DrhKp zIr3jABHXr2K}3yz^Aqu1W^4wWN%G2j3Z^<#F_cG>!)*Ix6nbRWE)9)Crr?n1IyuyQ zJ;mej$=YDJ%Ab~Xze?W-#Za44^D9swYciBLY7f;+9EEk1li?!U_)UTVYGhh^~&Ic zj}mj*r7zfs1F@n z%B8eE9T8iMwZj+VA#m^XZwGHs*^>`fLzZ{g2m_18rE0#%tUm3(VM*i*YJ6FUB+YG# zx|~icAurux;r)40o#-Lffr8e%H-66a{8dh|8mDbt@>>;+ zQ%cGTuc|huICok{Cq&JPj=E@ip-DIl{7+JYiETey`{h%Y^|V31{kn0ggMo|Rc(e(r zTWoPR>w)FJ!xQzBFnRUXJt>DiIBzdBhRvm*N=}n^oW;|L#AkER_YYuW2A1h!hKEE^K^^-4<*K|>Rky|ZGr8fVQiUA?DdA-eCig}Do7AnU*2Q6ge%^CDBb-fL7>d0`cNF14>jTP>VjW2;UA-S*|;{3h=7TK*f!P zh2I$>sYB!CqtLCK{2*Jpiv_yEW^7R`Q#e(z#2SQ&}4BuMZoQIN}*a#6z~FVtwh=}_N3qIM=w!9?}nj-Z%mZ=;`$lBFMlGRFN#2_(*rg6kMYPvtfY2bh|65?t<82vq10MX zYjr2FTC)}Mfxdf3WAY6m8n^>j|L-+p)R$NrtVd;PfoY=%klc)<@xAn_+m!aCgGvZc z=r?^}m2i2?=i`_vg0V3_x8@Dfa^NB8;&{i1kchYQ`H!~zxg)Wk{4;bx#i`5-VWv27 zzr!4Ae!Fi3j2M58-$6&yx;lqQ(p4f%bNWSFbIK666nYu052+e=Js8#urM}vY*DwHE6ER=iRNr9PjI|s)( z!h4=33kFoP#jrxSh#C~ssICfoY!E?+uS^>L_(S`(RG}MjpB*+`8eRh3)9i-jOwen4 zn(PDQli>uNb`-R}bLGfx<1p=_YF_Kt;KZ~)s%J*Ne^@&L_Lh7;NxAdst5a5y5sjxf zL*epnZ~{idlUK{Ul$PD+)ej@B>WMmRc+Y2pI+PSo!gmx1k)pjKjhj3qM_b*F{=!=+ z4#*C)0v~=ssMqMgHF``7UEw_@r7?r>TTYe zA54(7mnm-to$IXl*5uywQ{O1^Tb7qS^8A3zw*&!bygb~Ou)ar(7y|c#9KaGn7%BEj zPT5mb|2&bJP9YBq8XgiSnI=9`&*la37Sh6(1D~7rQz055cp@4tyGUZYvFTjYU63?# z({{x%u&Rt`BM3rzZ==*l%GgxNaLHH?CE211KKE55&aN&H$HqqdA6_c@5eoc-wFP5m zF>#4`^2n$d#fTW94;Uc$A=>{CUNx@4(u>XUFfiM;HfQ)-Soe(f))TznRzWk`IL7O| zFR3?sd}6>|fb8EZk?re+KG&HA@zgFqqgd@nce@wBOpLfVDM^!kII&8PGRCf1oJKOr zKAs5{1V3wb7LX+IlUE!M#+Xc6l_u1RufHKD9dwqv;hBI7l>f=K`l%2Z-_I}Zu#@?m zjN1DoiS4Kr!NHus6yNTg5wx%cd-R})$m5@5Q5w}?+s{mcN4!9Q zA!GHhmUXq+tH)5km1Ug0+(S`BEMx<3;=J*D)5132Avte)MR!^~#Me3T*K8xV>9J*A zRxUP5TsnW>ab1f~pVSe*`T0dVOlv`Ck@t|GW<4fm+A}C#>NhC`Q_C_vN#;uy5em9| z=S0mCjHBWC>3B#Y;#&Tt6tKwQICFchOz9gezcA1KZ6^)S&**2^{W0-Q&fOYF`8#=X z80GtV|J%b6u;+|T__6dD=#b!U4C2xd4Qf$=RMsjZHS2J%IiX8GtHmqdqb-tnq<>u@ zWwN$!T06mzOXnCgS|5hS)lW$D?eyHDE;GoFv;!ec&oZt8Du|dZ#XEnFMR{cfI}5E=gVD={fHgQsc4{m(j55px0fqdMATc0 z+*(9L@Qu?h@^2+h4a#rwt_kS+$Cr%t%(=c3yA<#3-p33uJl7m=NrU0!YuD!(EcC0+ zuTMTJyI$S4V_nfX59kTOXKw1a)AAukp|_w!!1>BWyna}#2~yfRX27KvVgLiR^EC7j z=VL)!`hMa4e1#b!>&Jk-+AG|;5J3Ec8_B%(Iqr}+f-yg<&M<{{DMvC=?hh>HL(Yu@ zv1o|~a@&Vsw{4+dSHv2`&(b7xrcrFb@i>Io%I8MbGwKmGTnxO=n8qPf;QjJ4vimS3 z;4t4hA`5QpI=r#0!~cJx4Dej-JrJOY40oGEQ^Tz|Yc?-=wpRZ(dJoVZooO|Fp(b)D znSV7`e33q(9bp|rplfSi30lXfs)IsdkJgDRw_}>j))r-rQ~AwlC1rv>4*AXn;CT9Z zpuk1Xu}r&zr|+o#oN)0Vv=P>G$w)Hi0m;tknK8gZR_H%jiDw?N?t?6Mnnqno_!HraL0 z?Pyu=KuLS@P7*P2#9Eqw(dn0&9H|!`v|-w9@PrX34nGRt5AM4mu3sw zFc0<1+eWJhNR4>ErmV);Dge|3)FJ8` zMxcPzv6S8?Vo*ChX;nnxJgQ)+80axrmw|)|^gxIMM_DG)9x)Hu!r}07%%06)<}?n{ zB0U$ogw&1{CB9;QcAtN1lJ;dp*W(p24ojP+e#5J$@!jE|v(YrS+cK`G)(&2)shjBg zs~H6=GBU|brmT$GQwt!7)!*1ThPW(5nkdn|T(7j(Eh55+do!6e!1)bwKR#`agVOA* z&hRw81Fh#V5iu^4^NB*eD5~x|M(hRUH?t~l7sy9VQ{b`p7SeBFOeZu<2Lje?*I0dR z!f@1@B$d3f^Bl8U-JD&5>6eOcdwuUyBo0~-xHLbXs7=G?Pux#BUoux>eqozPB-Z?F zU8<-#MhY&yUCR_{bCa-os((|IkVpA!*}EVP8c2QLiM2Fwu7w!;)4B9!0Kh|5wB2NgXQLB zKk7J)17qFdvHe`r{{W-x_ZMzTeDvdjjoyE9i%?uUpv0oh#g9CYxPDH6!AU584=E8L z3SqgT<`jiGzpD%mqCahls&5PtXr+$X-|Yi1k})tfYe&Y`7gn&+gd8Jd=@@`m+CC;E zccCO{4V?dq$L-xTb5KA~k7(ZjukE)K#06$$VIs*00iF1InMxQTNQ0H6-Sb8S_4@MU zxx-((=cVP6m%l#>>rk0{{m^1m0axY=98B2c`)7uf(IZZpMBG@FcHgzA!Fw8f9J*qR48!u$KIoVT>s)@|Y$(T(73T?6H4Phg- zKtfGL(Toy_BJo&Q_7OBDx7T)_cexZt)qhLdVa}Fv8+H=zUv~lW81cuDgTdZ>hV% z#!u#J-hX54YARSoHrIo=@BS2!eOUr-sO>zaDr-^X$-KR_|I=&*g_cMhgh2!AxX)(p zw07n4Ye()riO4{^QsFHOaj}W$CN!xO7PF)q(ah-4hfzMFMKMYh;xkfaLX?iVOB+d| zrr``Blak!P+Er_Y!>>K+F!Vk<&J%8 zJ{8qMGCBX!nDPNE(ls~*48csi^- z4Nem?VZHX2a4tuQAN)~9(bxMnkIs-7L6|R-J$db?PuJ(j+)Z!1xwTGs#)4LS-a$pV zhcd_ikF9s`u4|3@M%%ct8rx=LCyi~}&W_WVjdyI@ww*M#ZDYr1a`T>Z-tS`EKOkdd z?)9v>=KSg5cCzBdN%Yh~V7RIxb{lBk*~{IOJaQ@V6}rs+=YzKmkLs5rM4Yr-q9Lnf zZTctrJ+_+vZ8OkK*K5n1@$FRgjBoWO%Cj)Q-v_78=}cR`v&T@}yZu6__Vv-IV9Tla zMs`PJ@-#cE_GrGRR6@BllB(zED z;%j@eD1Drh9OruJKoG`O|C?;%Py|DS;G(94k$_MTwu6Jzz2IO07QR1&e3)bG=^!`LQDpFYmu!LieCw=eFX8g1K0dy5u z;fOW*UHh>eV6geRs7uCQN}i)dC{c>5WGQGrz9a%eR|- z#79!k528ATIevcc&Tgk}@9Ah?FNEnkPe&55pF)p3O{d{~Pd?SQa zK6n5cJ~^P*c;uLhk+Q=ma@7U)Qo!U~(BwAC_&3|%gbq2w7IKi|ocYM<%IwytN~1`* zI8m!F#|#gge7Qr7_&waj>_Lme$gc+L4F5>Pdc05tCs3t3AnU*_VeZdd5hL;JPW0oe z>M>pMG0Pb+6l?SLLf@%pG${>uc#kl?sy7zld#IT_O+E$Eco4Q|zwIa+Z5fl?>cn2w zmpQH};6ALMoX<~Q?P=NK4+!;hB?Fb>E*#`r2452H5&j~i;V^~GTpERd2&~(!zZ(;u zC-~zH2hIH8XVJS2|5Ier>O(|BNJHM_ZNPL)fVua{YrNpb@%XeEa9lFTYgJW61U_cw zRj6HzWr)_wajW1fQRTelF9X@2Y;Mxcv6oN{>_)M9wanCv)>HWX{2`C1zlORiUR1%3 z;t=}~>fHmAGYO=UVnqsPbL)NaLVR=o_aXd@m}f8EZa)2YJQBWbze7-8B1& z{-`&l^s|l@AtdXPxdkWmHixXY(##U+1NG9@M`g-AB?h$m%?Tf8<9$ z68y&H=pl`~)~S}%%hwoln0A;vARSM&l7A^HP^E%O!ON`hiC7NRXw0;iTt_8WMjtv{mj$1-}oKpwt8xpfee@!HpyoLo;WXzfH7WE9lUV$Kq8$tLcg zLz(7Jf!l8WUj1C%LOhXkk?}HGfn&u{3JJDh<=ywTf-FPJHzipXqIKQ08WqH@OGnoU z5@#rXVB18nQaT*c0VP2(bdnZCC9zS+ zB7f_Ywn_bQKHvjp<8ZHPnog&1#retpnVqLmE34sxRJCG?5F2AqAo?D(%X8v?u zSSWb1=d#@H6L58|hBP&28S9(;A3!$zerBiD|7*)^l=E`#H}ZR`AnTvib~N$iYl>DA zjfWU&+7Y1r+%&g4r_#c6b1M7)A|ZNK#BwZiz73y{42QHo@|%8KP@ZKy+s}Ny^^cFM z(YRB(R$mIbRmo%@?YktGk=&y!{ljj=+<>O($LZ{^_ZbNp8`PRmXi!^_^2}uy*xc*) z@sf~f#kab93_3#!_AiFTbKp0{wQtnq+6k5bjhZ5GAC1zTbBaFe1Z`votF^2hIfovU z=ft~ahXN(U7BSUy%@J`Kcb~&&;5>g`s%Czx5WJ__kc8%MFou}Wpwm;=&s3{?H_X5F3fs}{(s_P z{%ZUa7c;(EY7iepkSLRG76GYuPIN#TfdO>~$@Q;abNig^%i6+N^;JYFUOTV(M)YkH z?9Ekl?hIzOk9)uUC0+aFt9FtHq;CsK6oQoEf@44SFqGR2Z)twMTF-7u!@4)-rtM&% z=aXf@c^2(R%|$O}#p6ECB+_6J!*_%zHctLwkYf0`8u=yjIuv&<4$5h?f%%)l@Murm z?-%B-thm=5*^q#FpnpCdHQ5gAZ!TqJ6m5ZJ^{GVKstQgGf8&HBMlO8<@Y<59oT!UZ z`Qf~dD1Pw5>ObC(=Kn#+IiNaenhFZT|37_iINo>;wk0b0U}EsIhbcvIYfc4;bjlBK zx>nG#BL-05@E&{)47Fotxe6uruHDuAneL=^dk|o3u8gRW?dI8dzI# z;m~b@>(F*p><(TNHv1;WfH9^snk}gP*JiKi~D4b%B2ZZ&{6v>PMek!$b z$8mZ0`hyHXqW3|aq@6D<*zkUHcz|QhAU|#nR=b`lKbJ!n#_V63vA~j+64zkTI&s^& z;F({xAT5DM~#O$1TxM}1l)l{ z1TLc3`u0$`S$JMiH>XrKEg_!_J}`_~78}>}`yIy}oBPS94f1-Ud$Hre;7I7e$J@U1 z<%!~F8TS<_EZOAh?1?8F+%e*(;`Rk=9roW5>Q#DM8+ftdF}G-LrM1aPQE0q=e;J!{ z-SSf+`6t}y6xMm=wjp4P4eGCC8Efpd+SrNb_Tc0?4c$@MFp$!$%W*q)M#LB6Xv2V5T{LhL@N{{LHmYrydB(}Fm| z`Xi~H<{&G*$>(+^?K8@(8{e|U8+S9@qWtVfIv!1P~)w;ca=M1Iq{oztoBRpag zsW&OSBR_ucnE)AuINt&E?>14B$WS#^y-oK_PQ&;z6%lJQ-6D^PqR2)`s?m;$jiB|#V@J)#@<0oE}Cx*w;NN&z3Z?{&QR)mF?p;Yp`*OeU z{{AJJ2dUMYO^#HI`SWQr8-W1F&1GHNAl7hAKkfIK1ce0hODPbFDlMix ztE-;1H2JUSO|7vqTG)bpK{<6ky!evJDwky`&@Bk$ny@6XD8c?8Y_72lpWugReV@s}5_{1*<(vk$6myPK^a8aYT;t zkN04WhWGli-o>;e>6wKEw@;n&ePI<*jO~*fa~Sz#eE={sGGh=H-aiko^EZ@G9HQ>! zekGS;Z;E~`0vMqVyW;SAb;0I?kAlt(9wR3qngxv@@Rj~hfRPFZM{3LWOFuQlp~v?L%iP3 zBnGwqGW-&s7k9WgLyhT{)oWC?noHpf1wv`!{);Qf)azM}_dbVoJqK>ywz_a;O@_!= z3-;jv^ ziiUWH5hJ+b>Zc2<0}P)U>GmlV~Yt|~ejM!c|C%K@THG8_KM7nS2Ev&_BSeg6;6OzFuj zE~4q4eK{l*cpmT=)gBNNx2bc_`!R2y%ycN@h;I_KVkvf1y6hFd?A-Zu{TQt2_AtZ2 zQo1uE-l5h^UmhdUW^8$Dp?K>6n#n6 z>H3~?ixWsZX3z--hor1cxD&BNt975(lY(XZhGM?1y$zmYr`RJ#&^_EGPVYeOwAYaB zV&G+@*{FiJg9t9p1r!%$jlp8Q>-@Vd65FI<;r)D<1$ z;2_s5PjE$a-q_^7JV$>>DTA&2?yITi$U=smiv`3cou+dd_@s=b_#yKvJu5Fu`PVGp zw(INyuBlehaTBsRI)o#(xO1XX7eDR3nwzkjPjlrpQH67e8Ur5QPndcW=1gk!ybh|j z87X#~R`azu+ezkP?t7U%LDC^Xo=JvLXtBjFvVqs>ZtyM*56yYbxqT2x(k=BU#8nJ; zc@z}*UXefepRhQWvBd+L&I6LIh0CPFNyC7SH1qrh*_k6*0B|^SzgjQ!Hl?suQ?|s} zD!1hfo*(U7wPQsWTAWwIng99R{33dClQ!R1iCc}C5D4Ut%RXV#5#>hK`6;~6Vma2rN+})42m@EGCLgkW zQeh4QTy1+qs!(Xp>6FXuGJ-5DIgG3y;Z(IVv#l?!lq-RVfDjDr%K~I1idhGqKB3smXH^e&1|uq`qf0a!B*Uj`5kgEm)}c5b~2UWBMb z8woL*bE1BjR>!)c-0+pavbL|%ryxLofp;rEfjIpn0+b7=iHFA0QsQIxD1UvrrzdD3 z>eOnC1qJ2IqDSt|RTy`oZ8`2ZM$mS8tv>)Ux-AC2$ojB}s|n7rpJ$tCSjsjD=)!OS1NR5HvXCVwUU~+EaPy&W z2+%~B@Td@W5}RMl8+N{UQ!WadjDrRaPEDdt4NE)_o*_ka{lYw%(ueBdS<{R3*!2kI zTwAQYE!F<=FYtZJjYIwRNfVfPyN7>84~qX9?8Q6bH`3vlZTv)?sw?>3IWXpR6-FZy zop(l_V9LVFvxD!MDaLK5#O|gvaXj$RWfA|^Z2>8<-L20H8cPQHzSmW3eb-CyeRrHu zzN*4**(I4dyT9`3pLk^J%0j+WD`qe{+yxU17Fy?2;Dwrxa&k>tZ(oMy? z?j4`Q>*B+FK*&(`<rhi?9D+Qfp}*@SE9nW;~oVQKF?QNwSvebGW)bPDrJAKZLGJdKFEl$p0Ebb?&UX`3LO)4fG0_4 zoNKL8Hd5OE8TU?}3_8DFt`S??QL4{=Ff2XJo)=)ZX({=bq?mWiAijzA1k)fyQAE6| zp}1&5f4e08^-Z2+b!e$p;sY1%l%Zs_8mYYg@SHTy>RKNRWTVjYH7Nq#>K>9U1q2ZC zMzjn0m!n36@p_bh<&R1H)!#bSacCt##!{fNLtS_wF*$7i~P{2HRW_ zrGO%Vf|R|$$YX4M!;$N^QK31kli{6m zx*+3(w2lMj9lc5G0d)NskFR~%I|_2)0O-*17Ww-xHsD(9G2%&sj67`aR?=x_(xo^g z#%~3i2^mX@=DZ-BuGf|l&X#+C+IRn-rx!bn5E#l3^gEY44{i<=#4*VT8_b_+J+Bun zPXA6o;9%X}_d(R$pr(k9ct*rWn6XyjZx+iS=C5qj&2Cr8EDsm2`6bVR=53Stor<+# zwU_g|N`H?n*W!LjY7he88Th&RuX`{jq7A)+<^!uWqyI3u6Y4JE(%TDW5;fuI*=)#` zg2N8c6Ow{)zr!4P+61Zln=Q}I2b*7~)wvO%B4^q4i~e;ZSyAueTfs23fC@eIK zW1Mq0D<|eIwIx77<;RxY{A$(D2|NV4|J4GBS7PB`%Ru0-a8-7uZK8bcMRqR&fG28T z(fY?zPukdbT+{+!?g<?Br*c=8SD=6_CgY`(qpMhB(^M?c^V&hWWVD)E+W!LS(^oDY2IRQ4=-blMUR z>+Z=M<7v8RF}z1eOC=ZYxI*1zf)$2G{|4>;xI`+O^F^@W4I4yyHMUI#vTcT-kvBNY;oiu;INjm=NvhohLagr00|#_QwO zN4%SFo`;o@6&Of*H$aJ-IuQ#o8+PPjVW&$liwld7*!YNjCu9K3KI%|l<#dd5%BBoYxEK*^fLxlkY zj)7i3IT3xNK>#E6T-LO@)fd~TOQKWFZs2ew=$lyW=giKGdF;RsUj4 zN;mmar&A+Zi;_Z;jx6Km2QlX;DWPZ=7vbmxPO{)1bVkN8;0w!{U?B75D?0|%S|65n z%c$&NUE9K1S@p|Dv>6{yS}Oj86s-rj_wCcy3$Cr_Nv^Bq+4AalY|`%+-*+AOsv`5~ zZj;XvkIui%Wj>Te?be5(xet27^*$@CqwpUW3Q4=6Um`R5Yc#0;5bHtdz20~6*oF{) z_}2YXxHSiD0XEiEdDTpg_qVau2EKbUC1KQxmMDdDEew|Z18!Y zF<3Ave=bND!dm=16`>ZCP|IUhWruexcYPReiwj%bV-LlBf*l zIS{4SBjzpM_Y8*)+@fcgFbHK0e*DplE@H>t&`Kl}IR;g~IPA_`V$U8_FVG09b6N#! z!W?&i-nMc5fv3H^Lr7BL8${g@<4~RV27Tyixkx)k0+uCIWnf|HE$Gb{w-qRNv9!I$ ze*+=%zs-tk`1Uq^cM~JUQ{hibliKLL?0upKqm1R45n^z4bN~U>-!*PxSZ!jC_uxHq zo$-bJkclA7IKc=rOIt~~=%%ez5UeiqbHj5H06kt9&HkC5=IF533ok=V)iax7-tgrU z#+Z+_BZ=WLiOq=-2wDaLpmS`j+n9T>T@ogCQPm_Up|U5C|FVRR2Z|_$2S4m=6N||~ zb=O;B&W^=~VsA?@77v&0K|2u$ggP4W!n4iCNq=0LgM281u9=XnV1qEo{6t5RKyqZU z9>Wq9Pm3ZZ*xFD$Np-4NaMeMEnu6?H(lUA&=F+_$T4T zb>Va@%-4Tvq9du8~w4A9zp%b2&;`cfzR+NF>;&5jy44uX*!V z*E(NNrPD@;CzdDNnNG0Aa)L04<{%HiRc%cFhU}G(qKEPZV%xDcfb%-e|E1PJadfW~!gZF< zLPl_X>Ru?1SHa4DdGFu%aoQv~es1ApCN3kHOD$mI#=D)cT5x0t)w$V}l5b*EU&4kW z9XxIcd+8cEMo^+*0}mi7fg{XyCig*H3Y=L_kg`Kd?{@Ds-C$q4>JCk8nCYESy)!VX z!w@B5kikCRryieLT3?rnE&7N;>}m=NEBX2wles0Qr5ZTT@tDS?nvsR4&~WyVIDQT9 zm~_WmQ>3nmT|4EU7_LI-pZ4V2fFsGe6y`b_lkeSl@HTi8InU9-(?qoo#05%B%&^1Y zfDW!hvm49SlrfYCY99x9VlEtXaT_v`U!3kVO6CKH{YhC#4)RV!)YakTSMIG`#4Q!e zu?Qm6(ex|M<3K+u1BHCYq%=1CU-=kRlgIW)F4g~W)Qo|j6M zG&j%y-a?fULtN9Icpvd5bgu-DVR_$yrlA8ZL8KCcqa?dLJL69;69qC`is&E)PYTTp z5j24ZJS;qF2Ftp%6k#t*FiW@vS3G(OSq6funsw$<55iLIywOY_ZVaMvYs_=B_Q%P5 zc2GMhxda{;jjrX~lpvH%V{NhG7+l@Se$ij0}A?82xw_)XPLin8}shFBE780Snl zzkA~d3gC^*q!Nf`G?3`W-VgRPSV9kf6;zT@BEP5^<|TtL__fa()BvL$wRfo(koJ5V^%G|F^o1qEi$zPj+;u7 zYNrC2fA+N(mfW2w*A?&2Qot?FU2RRFLJR(RzSg*9sCU96{r>UZK?oIReTy17OTsg? z0`q>P)AD?Fj!z>Hwel!P=#84728yefeQ z@spcnmg)KVu@-Mf-LCvfVZP4a%s6kGl_hc8n$vF~FD#zEP`LQot*O}o{mdiuFtNdj z_=%^*r6*-O7Xbk&>mCSQR6hm@^e+gU6TZ0|xqm+o%`p|0f*lXi(z^EF|HE26Bd*B# zeLj4T?Ri<;=98WeXRmUd>KgSP3;a3Z&aj1%7Ch;x({CM1oMg@WY3|;`#^QuLEJkb} zrK2kiAnjUKz&pVYa~l{76qjSFQ8Ly4gC6rWvM@wRGMVTC$1H?wW;peR-~{8Jw@Ahw z;&g3-9`uy{qgk~n`&Z*hl0c_h8TU&QVeRdelzq$V?+Obfo*Q}Beeydj6H>ix#VOfT zoxXeeZe*w__RQcJs!<-PtQtDaa6f@u6$Alt0E98}5U;<8pGs8!OQ?gS{|7yi9)n4F zPeV~54EwC#{-XGNB>D@U6ian)x<}(ukZzzBuRNjGOIzsKG_9zk+q0ri zmSYGZG)JOI4lM&FlA~x7e&!ImGA1%KQidgO;Ed*ys(Ql9~eoOvgv?Q1ds|7o6C|p%*(7#81z*pVs7?}JR>@63k zkzbXa9c&pRO^xkgFpU~#Klvkb#>g8Y3>$}i>2c@YFA#RpT0yV&w(ZaIKy&{gtmKe@ zv&-@Vzz2{tnTj4W;H2zcJ1CoxJG~0El>*w`tFtMTJdd#?`ER)pzwn~2MHK|&Cnikb8E%eqbN!x*i&v-Jl5xg z2(9q-R|Ues9ktU5>$+F z?T~7)U8a97M`#BwVtjm>Zs>SVj0E`MtM)>@Gg_RmUqKJL`#@zhQ=bC_!+c}68)&vWOQM%ATP{K* zSgacojy1u;Q_P9`gI+ z|0gF7tCx$MQCf6rpt#5AHvN{`#cWq14(h)KB5c45^EUN*l-+G!S_3MCR`2lMo$deB z9C^Oy*IlMO`2~+1>P1^P&pGjZWAqyTmh3(BW>mR>gyQJMhUc3paQaZn_}g#5V`F^{ zo+rcoJKlN+BUa!pJN;?eV7-xEaP_lPJk=L6^a(|E@<|{D*&u?Vg1Q9LD=`tOmpUBn zf$E9wU1et_G$TIxDkW&ofg~b*u_QYtJ6N0W54RSXs)>VRmjhA&&$A5x%UaPLMr{_- z2d;=EmV8ryzyQ|FIFWT~o4SsFaywaCofP78-s`DCd8+o!ZuRtNsU9v_-T3hI1ueeD z)9SQewLR|;F;W7$(VC1{Y1b!oPrQ;!1#;OQ1Mg#z?nRwQEJH!NY9zS+X%ET8QFZv{e{x_CvI0q|gyLW$e=q`P2 zNMn``Tl_c+Sw2iIkT)N4F8m>SvE(bc4cWnH@NV=pkzA&K<>b1Zc%Um5e(ZL~h<$8} z6X_zc9-4%ef&s=?`lckn=zZYbt#^+<($wa(HHxdkw76t+hgYSo_+^UQFb67qlT`DC zo@;s3asEaZ4mZ|6^8_ol|5A6w+n*5kL0pXNt(CnYQ{kQhj?tIU+;kSrMYk3k>odX@ zqhKT2Yk{VR2CdnzW+%MjYEYrfmMhm??EYH4dQ_>61v2xfRcs_W^@ADtGoXTi6*b{_kg!CdO|bKqhD1u$K8T7GFj@4avLLMs ze_%%w59T`c$$BW@TG#p{e_BWU*Dq`ScFxqcjcp8n0-Zqd0p&hKtpz-cY>5}_-XYeP=yRHR+nbV0XM^~ zbpa5xq!~-XNk7yZj~u0<3L~ts#XCiaH^d`&O$_ie++%I07yA~4m(_NZjxv{y1w2ak zW?PG#gu&v6Zex}QP>5bQ0q~OG|INT^5PEz{@5vc)<7-@74Itx^GW^q{C|?HzYap1) zA_Y$Ht=2VLze6y{KPic5Peac_Oi2W#K=!;61M_cFff_$zV9E3Z(xhfr26D%Eoa-7W zxDV#x`!K&zC8rkpouUO3j4SQY08JXbEOBs2o3V;7kp$=-lb*h}v}Sa+VhDgoumKk9 zN)J&PU~-E&&>}`oW=0Fq&?RTQ*(6(9YWWnO#aGtXX&I32!BW1iHcZ0z-vYu4@6XXy z@w9kG8GgpImb`p_Zy1cYX|VAZoETa109&R`~u@tltQ{o zBCN)8$#XL_xBOW;>TAozdearbQ0y6GW3}A!`nID~z z&YE#=;kaCZuSUaY*H?&e+?@Z-q;qrq7&9sSb( z9#b(Z2Was@YSX}ObkUaLQr{8KOukI%qJ!)GS|0y=<)GKi#H?7EHe5*LgASZu5F3`i1=|vh58;$SBK}0{|bx!w@IJ zdN|GE+zwq!g_IQftCyG7pWHN&=85(T4sqXpXvVzkx;$2N6?ne)pf2(a6jeKlKaolM!=Y#V0sY^?j%Z+qGy1p&bZPCudo5l6JAgZ7qpb%(Y|;*o69ty;v|E-|@1}pE--=FAs6{ zVIj0?v)eA1nic8suR2ZfL2m^;)$ZgTwp@4nFU50SMg&@DB4ct#V*qaII~T znAuB%B}I~wivk#zt9W(B-@XHWdtuwUS7DxDR%W;Ri0>fM{bUzYGKkX&;pebHW_)j22AYXO$~v4NxF`wY^g;UUC`|y=2CU zYKxgUaXiqJFDk_3Pi-`g=W@MYG0zTu)_s%N#Ft?GuUm8yPtUAvZE!p-){Y7?ZWO7m z{whiEojj}(QnUaJW2%@pNJ?|Zh~7n_@VL&XLawlqhoBbB6Ma1xD#6_ugo?D7D^8)H z3-b>0SC~DA$dI2!_Z&|qIaFoRJ8V8Q{&t-I02Za=s;Hk{gM#P>Z@RNr(gpQ$1lIsn5FS2)LCU@nybinUOvj!VY7>N=MB zSj%xqM>Fr6Es@(tGgp_b)mrW~?c1(kQ)5UmS*-d6{Rf;(5_O_h*oO55ppnmN2r5JYh~d5<5GJFui{5XLT}gJ{yUoEqB;4+Q)3f0yDa+7HQj%!aYM9!y=Qf4AMF^keR;mhmiXIiu$SYW8w0dy zY?^@+B!uK(764yj<;{hu3%`!s7Ow)fydO9*=0i>RR;i4D?cjVtM1s-UfqlHearyjY zlZ;H2Ifb+Y_pl5z?u^Ct6MS>KuFEmSx%cCTYI9;^d4|4IR0u_J_ws<3P+vABSfS{tRPmKKw@R*vKb$7b(h7$-CzhW@*YPDf z!+BfY(o`~sCZ(ePCcqXh zC(w9K?ek5^b#b^#Joo4F$G#NVq_xL6X-it8kL$HMbE&8Wg$6aMbHgn!`VGITuM{Ue z$a6qU4RB8Sw#p9Bi(swBjf$%6s@;9x{?q%%9=+oxKT!Y`e57y^**oDQF>R7`)rhe8 z7UAUlzapB1iJBX}?L=1dH*pUPUvzKru}WQ-Px7a| za<8aU1JxuJ7Jbb~qi>DsD5i5JKj7h?g2mgB1S zd0`|;wOoQ8lUUSe9&#h{1OE+p9I6!7y z6@Qg7IKZC)1+27}0ql!j`6h-K-lnMFsB_5?_}1alvVn2qXR}R8lI*pd4=4{!e1)FV zXPUC4#@BQauV{U)Fbps)4e|6$!c-_Y+R`?8Ott~-x8^m8_b98|S%>jKr|oCtR9q*~+$$I`X!!dQDP+6Y0xn@y+J!pgU=b(f2qOcA^ z&w5`H#lR*!jpDVw+*@xv&uE3^7=;KxPum4UdP`eKn~0&BHWV(%&!zVANRUq`Fk+X; zdLRj<*>1XP^2?BfxYTeEZ&7az%(E(o`q#PFU?vT2a=Ps+Q`FUI7;TDBe99)!FkW*EZPJ8+M z+Zgx|4-|Jr+f0f&g+FcdmziPOci7`E{~RkBXoxa-m-`2OtBWi`o2u=UB&=1t^N(|3 z@Uf5WtBJ4^4qZ8otnTPu0xlDfbslFluv}iRU-ID;f1EMo!aHff(QK@L;kO&c#Xf_+ zqaWAVq?_6(4=EA?Qttp8zz_MAvv-x*pl#Q67IYd()D^%7`h<~iP_wL<#K}jinV5d` zsEcD1Av>+i&ymRm$5zJybAFLqA>HgTiG){EM=#l&+SL(z@52>vV5jeI;HyJcb1S+1 zJRctJY*kvs>%pAXwc{pC4sRDsY*5zFVm{z~%Y%5CcVQp0%n z3nIkNw1txn&CkC;*{4`%W2(syImr$^nr$IrVc2CUB*WEvu&w!`7 znOyPX2`bDYlrb0$8|=(xaz3{`#Lpk0mYKsCZv0tE(pb){h?ox!bxp~&z@AneoocIt zl!unqQ#viTV-iG+ZXU`{( zxX#b-@@U>m`V=fIpSztlf|>sgjTl7xUoC)K)@TpA2S8s)`m2=S7LL=|Z*s!AXf2G7 z#7~Gi2!}g>hUq)NDzvZ})cJSI^Dx4kZo&w!No3H=Dv>;nx~nFdX+#y%4M(*Uu>*l@ zfsE-lF?31glFqbt7fPM{Krk7xpDT|{X*Ll%+}YE;vmfG5mPJn@C|GFWPivpln*^dKK6BKeRbK~Qgg62V^#koOP1{;g zynnz+mf3P*?h^!69C2+-n7?SJdVawfAF0nm(fYmmL+K!2Ab;hXFxdiYJ1q|lxoX#f z%c%Jd3;`a?LIDJ2V79K2BHA!Ni6Qwo?YVnDAon~^hZrk`^BM(=$)OL8{tsU=V?v}K zdY&4&!13jAw32wh7deLyB!y)O>#4KX-%^w?_EH)VT@O7EiJ|Ci+e~Fj4Pa&LAK@MpL?YVMgCdl+G0<81N1m2ANN}!6s+2 zD&yS#5{h{T5p@?e;<=Go)Ioq7rrTTMb}=kMa2&Gy=Fuei3+HM@Idmus7Mack$|g#j z3gu1^buP7z2AJ`}#zJpaZ#ZSK5W!c;c=HBtc4FgN(}vA;X`C&jYe|YyxEX9&Us^FT zuY!%@S`rUqE%>TJh}HZ7)w?>2Ri`b}uxYpLFPnBT!@7!^n`=Sp#M^j65!h z{W?#^K-67N+J{BA z#owM5vi8jtHfoa&^!ZLRENQi|Ni)>U$%-(NlI(~3j*-<2l*1_*s3h@`(vnqUlLtQK zR8xNtBxM(+RR;?xl5fF6I(RXX$36Ng-5XppYlROWem3K6)cIWgl4Jrw+|4NcbI8na zz$nk;`?9sy)!_Sqm6xthMswxA$K1SbPr-u`zeh3hEAzQWs_VU`2~ML{EYy*JA!;$%Qg=;ipjpiw_ra%^5ZP`A@>{`ri>y|L4p11ws(g$lFK=-6nMCwb*)*aVHIZ3mG#=V9ilVNuTrj zmIrl7jH^EG@A>9M3b)oK_taoiV-oH3aGA~_T9H>)Vmc3Z+1}eO6Wmlqs$($4cU;`K zWjt5nbn=a}{9DESXpP`(7-79o5Jn_IjXzT}(>#CB(Ndg@q4W;KZFFnaQXZX|2MXS? zxW5QqPGfT-2e&ZPjDJ!zME>W6NmTWCO6GQ-@?ei^M`>yF;aDrDwwoVCX`X2_NMa=5 zA+T9i{b@TH?}j0q5bJX5J7PZU^GJ-=jNmvh(lJxBp%@Sa^!Xy}=EM^?Kz=sT&t<1T znC6c#yWPa5MU-FCEey-ULB~)xpEVD^ri(Tz#Xmj0jr0o+ME=HLQv_Nk%&Chn2p)I4 z@4So7RbmiA(63@mmr{ovZ zNA@ZVQ@Z*c

HRZC?`XI~fXyfEEB&la`UiFol|q}7 zlCM7re{lkQJoX7v$7=L3zMhFPOS}oii@ETvntR9rt;O7<48gkj9v-R(y=B|Y;xZUu zma)C@YWd|b&yO?y9RQiZm`I5|uz3w+0@s(JJs$xW86%r5-A$F|dNJlLJ}N@uCK}xj z%clq;+4v`!kJn@3-Or~t0(Mf1!IX8AvGbNUb9((4Rl_|zn48a6jPLEA(jAs5NPKRm zeJFlSk!w1wM_!OLC>d0(tP10D0tFz*Y;vTwBrr@&-3@wPO%N40ltsG_+RT{Jo6=8;R#8a)gg{dnzo((7Wj`GpO!Wq~9Y{S`SJ zwfws)OL9d*-#ZqdCO!&6+`6X@g<6@<=DZJ!?bYtX69l1{cf=OowxG$`g(k#>e<%u?Oka; zK|5LYkwW9*%klVQy|egOo;@aA+$>*&Vs02W*M2RORWGwXvwU{Ql{&N zYfS%fl!XAyXmsU1)JcAf8~j5BnPjdK-|;lZvC<(4&USZlnDStG)O{ERm3#PIml_47 zDanTMC~}3*c202#GFC3P69f)YLr4>tmf~fi|ANMMe!n^K;h<x!$AEK7!j$YY zm~}}2>Xg7B`9MwtqhSiDV3W9?c+=m18EI=(wQ52(?ax75bv9EU6@T5QkGyU7lAn}i z?c+n!d3c!~!A1lP$8G(pI;4SY&pGjyKGv$AV4p4KeAMLFnlzoq0lw|!(~aM~?ZfAx zdT?S+$c)r6ey>B92>1}`#4e_S&E}Aqw}IhI2=9uatTQh@t0=!v85DWNLn(F5gXq>EWj4tN zdqnl(+L#vVqLeM~*D%1q5#8HWG%8e2*ijaU%picifg;%jCTtv|igpA$BnG4$kGf8LO4$%BI1e%`4-zem@I56{HzG zxm4d*%C+U5?DGlyX+(dsbKH-css3;{PIoq!DDyKbx_KLTH3C%}LJGhgHo-x2A<vZLrXk4u)8JCVBIP2-RX$#Jg$+*@?8!S&{gN6j?hcY^_?oEU z0+%Ub7lR5yM--Wnfn}vz=ZUfSoWDEqXr2nnOhIs~2QrYXP%3!4Q7%;2M^}|h{@}8# zt7x;Dp|u79?dDrTsf7mls-R$Wdq+mUO20rn%bAJ_bWS7DfnC&2!?6X`_<_X>x#k%V zpkZ;5mA}%MD`G>MqLf9$^T0n+UQtA@zrwu=#7BM~hF3PThe%~V!XU6=4kD!Tt5{!@# z5)sJSysD&lcM;5q!^Tg}f`@IScVT>X1i(?)D={HytVK$(w#=zarWQ758Wj(K*oOJh z>CH8?9z-hW_2~-DHq{N)7B#yj+0X()R5HY}o5F}>E(Re~b`{vHv>60wFw4dZfX*bP z5ABv3{wTvq+Y`gG#g4^sRhq?7Z(pg7*U!@ev0jPO&pe_sHv};!bhtWN=DdmCej*Se zbn(Gs#7!i#VYqESQFGWpVDMWGgnL6nM!|Nfc?{Af^5-E6-f#O@{u2Lpf}<#aNh?IY zeRS+xEKsb{$xSShKceM_6Fl-$VXs(T(A;Z(%)nU{XJ{>A);wa5nq+2Nvsy{6l3nQJ z2Y<6RRR1N$D4sQ=vqiQS9^QX7sX_Bfp&xy{ua4;AQFQcr}2mvOE9!>4l0iU z@kE*UA($)Sf_m#BU`OD^L5vV9K&<+O0?l*GUi=Puc5ln`9Ew6I!c&+EdL4GK&X$Xi zWVW2fhT5C-1-UyJ9$t)GVZ&~_Hm6N*@$lUbt{~Ks1U>*H`ddCR;ghi zG=Vw4lG-wQ%5OBU-NyNEvneZF@#vwdD5{P$m5pXPEmca@k#qid+=*LGXF;aA~LmlDMbp(GOxKoY5C&F#kbu8%N36dg-0%sn}g66R6jn?j4(57B0|B>vS{0o z!YSz83)l^G(FlViTJwwkxZ);5laq^i2y1X@;f#`5TTIr)BsgwIYS;gPFLoUz-cK_L7a)~IVCN6}Ky>o-i7#aB5=F&e(nAo>0@ z0_1p7+~QHWAUk&NdalauM50QMEaW_zKYo$YQ-9Q_boApFMGHo9+?ZwjK43sIxBc~9 zCJn?HZ9M{LF*tYz-*pZfkvN22zixf6oVs?8BX}%}1vUbfrOcV)y!6G{_@BW-_$N%q zj5OlzVZy(AwTK3qG9cF|D?%m}4IJSKZXOWqXSW)?K5mE7y2ChUR&>a1tgkM;ctc8f z{nBi!!rkXw_l7w*`V)a$7kP!VR)#%pX-A*h#)fqoOEobpI(9p?IKPWY@(|WzsIz8X zf*TbNH7|-ADx2whC+|+h>>6z#Oyd2w<($(O$FCo(P!cbhbO5ugfofs;J6XZ_CPvsO zB41C`0s&^mUE4SrsH7YRuK67hbHMUtyf*3;_W(t(KvgL9vd1LHyxpO=pisrkR~PJ} z`d8K@lO~>Dt7u0A{HtnGen1gsf{9qozI-EmM=aZ`UkxjO$|kRnyuCk^hFQq2Jagr3 zJ#4kfdY%Cv@oOI|qG5pO$cM)I8Yd|w2ZvaFZnBTe4uc&a80J2-+c^k~PttnH zC!lwf8q=k2N9n687}r>JZO&uS8nQo|xv-5vuLb~v)XZ075+s-B zP~g&3<^Yl=58oj{`?9Ds2)2F%gMJ^L!POpy=3>|S*$y>4*!qb1LQiK#*~OkyB`z2} z?92G&5*f``damCr38@MOG_XclrM$76LG{7T2R1x6_+J27@&^Ii+06J-k1}jYy^bf~ zm6LLTSLt=arx3IRVgSQN8n}%Z5F8YQ7Xu*qrFY{$fe9{WInH%dX1YaPILrgCE{QE< zjEji~;k(R@;gG5Ho!b+udHNu2Tpzgt;+(iH_^-aJM(Fyl zYvm-?K%2DA@H~*;5(yYg8!jCMIh-(zQR>Lz^0gi@mAFyDCVlEx+T~>5a}piAtLHL&rCc#{y{wmRH`1aCX#tUa)J|x z@Z@H59Z|g2)q1yt_emqg$DPzn!xa1{k|(EH7m?NM|A@;sua7QxLXLh&B< zjFD0yRT9jv5Q3q>s$RPKfTTc&NL=hhzAiw8MXoI(K+ggFp{*OLg1#|&SlJwV%aEYD zIBX7S=^RXZ-zg6rUZLTt1LKjd3u=)ROekD4yql7YF<)a`uNc~6~NwzB16^!v$-5z#0_YS&g6z-}~B6o;!^%$`&g*KE)X<^X3p)5ZcSRu3dh;PAR z@t6COaITx;as1*y=9WQI9u=)WauzZ}QO_vyiQ%yALJds+^cH6)>$Zk~tM+UQcp@am z6IB~KElhdbRtqSiqq(e7jRXhuga+Ug=zg}~quV_M!eRNrIo%KKyA%qN_t4eIfBiU( zO0cVpIq&?~!olWz-{F~eY^wgPccm~Y?fBbo%Y6rFSas!!0vn#Tron{B51z#8g>xxu>`kS$V=S~}J=dmvho(6-YO_B|5y zlY^LR_KEW}>{g=-Py?Opo@Mx zY$L*C7dA)o-}uWXB!KFtA-iiiU?258nWs9zOYpBI z%46&^bR1{@3)!-NBFfPGswXP00DC}!-=Ov&>SdpZe84eI@inTKVt?!pO@b5|jC{_3bfu4x5{zcdUxjRwDJ^5{QhfHx zZnQl~Vjhxi8ao#8`eU<7Gh1~_#UC;35$EC8+Bq@^U`1c^5*?yzc%S2 zG;A~24X(B@vS=cS0u;SG#tq1G50>o2G7#lN`H(%UC1++Or6nb$wU}#W$rt5$ycEse zYI!x;N=k@n4G-3uyEjYCE`ySmA$!(gFvkH09KPm}AXw0Y0Kd+w{_!1UOH7GgRR8w1 zhV|?^LIxW41Gk1-OPXh#RZMAwz<}+Eq>Y#4zx4NTbydMGjI+j$)4uffd-lW?2Ic5~ z=tI)xaD~On?(6ifTFzf+Se;b=YV3T435kdn(jq$bFmIc zVNn$nU_u*erILU?M^PHWyGn!Bz`lDpHKI#p$!~4G%UKX=z&y;Va~5 z%#KfZ5)v2_1gP<|T2?w{TvE?l4MuXB!65nyK_rCsa>FStWpelo#y$p){dcya!a|;A zT3HUJajGh$M<~-^Wl6D=|7PxyAPeX?NrF}J`U3=`Ph6$giBH|SXYJt^+6zlVN!BRV zFc69c6kB{Mjj98Xec>a<3tvy9`&%$qkLTx%;s8h0MxMBset#&;DgV&TGTl1vn4`_` zGvukUMd


OgIt@M}b$ZW-(uS2!>zVL^RS2S_hdW#I=ydIB(i3!5yJ=J+lJ`h53f zp2c?hrr0S{$%G|5&#!H-p^>o!t2g_bJGI)_UB!8Rez!~-dnuVL+x?v5l?jy($1d;% zLIx%hXypA*IsnHH0=^QR#RsZhvb~QIs^F@`ViL z*s$@TF~69RyP4;W=T#8!k$RYKTGdgzuEg;=&A87qoX9P3D;EffJ)e|7IJ0NFrk_5m zB@Ci#^$_ymd8@eLnMxM^7a2fC_Xbs4nCGcfBH3$dYy4Q5i^Ma{91;&j#9)XlR}|fX zlsPq+juHuxuVeF+9ZF}d_*`T(da^w|EeHck|5BIjdB3zZ75xM1$|lZ$fW<(>0GJT< zKA;k?hyQYumUK1UJHsWMRyx{d5}fvIvsU%2^^_n=^1jwx&+L^u(b(6kq2tzg=-Ubt zY_EJW8k2$bBykFrvuhFGE)rWzJfgtvuR55 z00L%^Sfv_v>6Hu!DXe!~-p5O3@5UANip~xAikx}exP|00z+aRG&NR%*4buzLV<;AP z#t0J_YF6i_)GsjDoS>-u1>k7r_t%nDSB}066Ij234@0P z#*S3c3|K3;+PqXB6n#tqrTc4YgC197i?3ajqg<7-akzWvfuUI0DJ}TpkbwBFkCDaH zIsUcLqGELpY(nd!W?-CleHNUSU~7eL+o-D43tid`jq1Ev3n?xSeywCXRW2~vzpd2j zAzQmvN7{4rzfCN}W=H;j^2Lq-UPvPvqn-KF$UOJoKgUK;JXf1YCYVblz3fHD;~Rva zKY%RbXT!tX;J$-Sf|U=Y<01-gmsjyH+&lM2PwJj8`g;Gz1t7U_ zEqqVevD)Y~?~YQ)EWQOCvm_B1+*pqm4~xS|89A9YYeeT00Jd_ipbc=5eX!2^otT3j z_IGNcmlPl%scF(o>^hs1fq$7=z(>RglvxHOTsK^w5LeWOQaO>?j^$rPXh9TzsxlQE z_=~Ud0kasJ0Jn(YwOHyYUa+w$-g(Cbsj8n%_juxQNI>BGbX+>ZJ6?sQwnkR%Nal)WWn*KiJNm#JBJEQ?|%Xxa4^P zI~tP%t7%%ov*CNO_5cK{?=?{>Xcu3jo1`LgYg*tX@vrIslvY}zwc%Xw8w7O033i*)OoSvkWr7&d1`zI{el)-wt9}xilMpq2;+~e5Kl=re zjU-0613;4p3FVn#qBMUG3s&42Oi*l~ArKbBkkSOF#?TuEB&~W?utIh0viP(3MG*I9 z9_A0H=M^vc_ypRRs-7Q*&=mJn4&MG31;&v7oRkP%Su}!ovS@K*oL4hc63K1}7IK&g z;+taI5<#`h?Yu5VmVif3k3WqULB;@<&cjd)od34)?f_fk3)2`DI zTS%wdpuyHws5SQZiU@_ANjWpP&IYg=hP^_-8Fbk%?9$b4`@d-M9^RLy59&_;9( zf%^2gjY#vnOt*nW#Euy2v40$7*SNsC_vHFgTV#Myo{S`t1>nrqHAZ?&oX^5~*A7=M-C{-K<>(lM<@85AE-STt^;s^A8 ztq$f#5{Me#vTSgQEoRj%dsl6vc<)^(UU709A#g|lCJbQCq6Ambs2eb*4BL(@K-erL z7D|cPM74Su;+R@L9;hD|Pqi$zaLNa8eb_xoKFAue_(i74-Mkz$LLt zu>@?<+@H{ZRquzqy_yM= zWelvdm@gr8nroijPo1&e5?zVmR!WF6e)iKhd-1BIRd46MT z{uODnD~+sf=VtCH?L{D%w$#af(xs7+jprs@Ie>AP3#NGj)&mo&cJTU#t&V`CZ(k(% zjggysa~?{ffMcsTLITICGKSPXT9-h$;1z-vn;@o5KaW1f7xrvrPl&M47!Z+EJ2=0d zeYOJ=Me9ktct7-QO-NeYf2!Ky7lRR``cw#TACsg#6&$iSHTjw zRP^&!_`IedVyw3dT@w@SH)$FWOb$OVeJ@l*Vk>OVkFw`^nC-yc#RAia!W&H%{8J2W zg{;9>W+%2{09UT@cIY$eh=1YT_wr>WVz`E|xYOLMolxS;!Hzm0I>ZJzjp79(G@7n| zs9*W2;D`_0eL?^l@*%^0BRbx%CNEfbTXOf0);6h^W*6;Do{BIGJ9ctBQ9%6pgNA)& zqiEHnIO17JAkOR|fmO*1dtQG(Eq}Hxfu?QvcE{`H*c6*TX4sdmZQhW)e6xk=XRrWQlSBa@ZzD3>;m#_Po)CSV>4cJlu!4I$M(#`Zf z$6*m&#xy`Dy+pX^hQi)5IBhr0Q>%qQYEwLQ4VL{!CRM�xLd15%*c=qKPbmNyTI z)(bCtscGhO6s%`LD5teioqPO{TYA2|I^&wOIzs0)4C9>mWfcCV=4VTKuN+F8D9E^6 zH#qDw#bYvKR1h8OX$qKf$O>xj`*5@B_h=)HU&!cq=vj1GNhZI&aE%!INB~t(ngr!s z$0ruCyu^rC-*JM%yJv(LqVmHZ5j?|XOdN8?U=sh$y# zsWT%2hEWs1wS=jezCGVNVt(W-U@c%KIX01Z&q9TBt((7zpn7PxD-eq#p7>L8nK`N# zmDW~;M!;>bmnFEAtMmzs&7Q0(8lq{XY_mb@gZ+n`s{#W*0s?#wk5%#c4tv533h-QdwwTX~P3kj(b{She!z1mwP`{^&S(@V2ImX7nsTV^O{ME~F0R3Ol}Do3fd* z-4<-|Z8aKqcZbn^7hJ&Z6Aq)kR#PbS#I+MRr#qJ$pJDGt=gesp`jpSq-X)CmxYMzH7F~90 z>-5gtd0vxQ>50ShY~RA*@?Q5J<-WKH!sb*GA}3?O&IETcRw zC9lb-22#IXL>A8rGXV+lE(eS=9-iZf9FTxqB?|oY0EWYY>Ym{Y#lP)~j{@PIh4tg^ z#;cx@4i~Zx!!nT;DdUMg;~b9ZLUu_{;mWe)d|0UU7FpI^Uaf;I$DB31=GNlAK5mAI zbwwJ*JU6dV41;|jWLNkDXvoA-Oq@r0*8wO6YPnaHVa4MXL6slAqD*G^hVS36-#=ic6Viax_e0og8 z0}do?pGictbJkSvmFl>N7+`_Bc0$rJE*9oK)`Kae3gkQa0t@UZIfXY0gDg`Np$o*D#a_WEBY)~WF7O#0cpE*!qAGPM+#lP0-N;K@x4|xE#rfO zh6|2)m7#`8Q$%k)@j0OMehqw05-gb??P5d4-i=Ep< z(i9s58R^H%%Sqrwwv!?va&-{J+sJB9+lk?6aPTsYIi30?P>aia1YY}Zb)|Gt*Ibl= zP#aajIF!=Lw_$N;;1QaMcHtk9__fO8tGwWe#eFtHpruG=XXm0?JIRH#+sgfSnZNJP zPLIwF!Y>}5Dqb~F1i34`GQN=4oMu;5dUw6jVAy$-9+p>*??^?v#(~8>2S1~}qV{l0 zcbH2;mReM9OY-#y(N2mgwO|k1Ds{ zD;=1phxVkee6|UeAi1-45)oBlPlUWr7`?&wnB9ZKugprp+`M{of^G6ATOWI?e(iz? z(wLV7AJaa?m-d*gX>$8E&KG4<8K`9D`^T=B3B0{c&Qh1!D|tR|SR*xI67345X|VZu zH@~$JuBMw2yQ=a}OIpOz@jKgJX)f7Z0wP_Gxi zG4b2!oP`XaqEt7mf4*%KAa4zD=O@+VST6P4*?e~{YT=>OqPBS{xQ_qgMIt;i47t}G zD-1WXF?kt-G;zQ_fUIr$m?Gvs#lYf)wM#!z9xXPRm9|zM5a~Os;-u6)oHIQ;ppi)` z3%sK9#Q(B0rXvEU2ck%zn&pMi^}lZL49g>trv zT)Ix}b&)Pf??R)rq{bSE+kL<8JUb9u#oW}n)#+?+A9fX$N_vvN3!MMJ;GqRlqHRQ8 zM#N-34S0xnfs+w(A;XY0Jv!5`BV}f37ap)M$l7AFqql!5GcD-P2j?kbU=BU*baP{X;m=Od-Gz4MfA43e2%Eu9x>D|%l?v; zY_biRH}%c3RG77bM>;cmFA~GP)k;EIS`gpSQju5YEBvQIv#3blU?&L|oQ*WTY(z%( zYwThjEl}3S0Ce58RFpbyu-7R@|EO1d4$-N;R>P&P5Xl36A`CA`?4CoHFYTB}YkxfM z9SY ziergG{U~U36gTS2wd{J*?P?@fO|66T&(X0(KZHO3U*F+RAn}Er=WkDW3eEzbBUfQG zC%Lwh>0Lo5=JG#pGZ-)tCp_#AUevFy2E=1>DbIHwcCto%#>fjmO8EZi7$SxK&L#h5Twz^DO7DD8d-T<`C8xkgUfq47_=3 zu!lPdiP{12I7=G_%t%}T_dY6HLOq{1+#L|%#^FR+Y$sPHD-Js}Lf6(NqHUc;$C5}* zfE<@rGBQ#gFP?O;A6QUmJ0APL(u<1WnD|)v4!4p0lq=j-nIn?pLlqY!Puz*Gg)9MG zr^1`MqSP}sI*z9Hizm^QUVN4Nq945Nx6RSfiS^vS)>dCsgUB1a>BE~xbjp4%B>8y{oq@J&W_z;p12--o zW7?9#2pw0gl`mByBy(-G9sLz6uXje}`cYvZtj=u2-@e+h1eYN4v_0w%0t+bMB-GOY zZEpZqc+x1u6sqNS&Pkmb7M-CQ!4}y-V>e^sqA-X+fL2p;wYu51Ur!i&igZH!XZ?$X z>bte2Z;YL(V?(pck>71L8weFU|4Rz%5&D%N+bE6`q9rs(jAn~;+k>=3qo(0tuKKo) z#!y>_Xe9$bzMf?)joICS5G z8XkT-1XHVRiPC(4pHJXICcWl-8D)mQIn&p}6n@+ZOoDyIv86$Ko3GOmhkAQJl2xBo zAhlCyWw%RRIP~Db#)C~n=!xtOg7Kmmop+jxy;(+4VU~fC!UJpBRr0i!;b3L{6!8A; z(^}^wC0j~~AK9Cp-P^^AR}?ggv!fhxT58r(kam-?7PF|)$vgggo6)w4%dLJyIdrsA zz7sWR@q2!N|Dj9$fyDOsop)M>3Bz}Le%smbr8&vrPb8ZmJ!SLam8#m>PzdaVjoMq9 z`TneB*+7@|6Z(VQ17h<&xXY`XpBW(|2L}bVgkE;|)Ii-`JrV7*!a$*&d&ScY>6dRr z_wsSA@D3)T-jI2S1DP{CQ6t1KsFVVBURn|8Sq<+ENYQpJb$_e5Xg}*6Sg5|A>n(OS z`?JjKRXg7se;=!@-A9@SV!LT7d~YcDxt`FVZ8}?My*BRY5+`=7_W$wf6q%CYo+O;$ z`e{+p02`T&?y1FHxEQ6O2XrCpAInKir)6XFKq;flA@jh{#2jKL=u9I6ONv<(5>_O? zr|R40rJj&!FAZ4Hr4VVihtlKfSSG>ud!G5VQJ|s?|JOo5mzBl{X9-DAO^De~t2k_v zoFi&~RRchz{w!w61aZsN&mHy9c$_boTrZrEK@g-eR!+*O86x*=X)dr5FXTeu{nmk)T!JiQ1|bf*G_ zn9YzQk}!MGU%KaGOtD|5KiyAKa-qy>d80@JOtob2CWMb&sZ1&H+~EGE+?L^Sd}XqV zKFt{*E0sdO_|vR9v(6H zrDda?#$bJ_vwAjSf{uCJK^aa{b<~-=>)bo>h&+Kf9|}clG)pGybyjdA(Bnw-)hit* zwS=P5&Q`h)=QS3p*Hn8>r=H4jPARD$;Rc3F-E>$*c#7G^^n#LFqXL;4<KBcQZOV8mcw7(BIfpn9?sJ3|S5=8Zp7lybpFamF9I=+bh>p z=9G2s)&1!Vv-P#LCV8myA%Y>aGjP*5pMzRf@?XL5@D?*T5I$Zh4u+DVoELU6H+Y^p zAIO;y2^v6I@OsI2&fj|9D4qu`|JpQ|;!JE>_L)Yvnlud)#kxgW?8XU3P+ha8`eAy# zESK{Da=>8G{R&p{?Et{?H@}Al3kZ9eptZZMi zPhIKde6`?0E3nMsvNyC9B?o6(p%)o`8N&b9QQRHcUhA)Rz3AST?&pvVP=h;F`dmrje=WVcYebtbmA^?>e3|Wd;0k2D)FfJhf^^!_oyg+sLH+nX)jfTij z)9)B@|KVL*ooLP8M+{!9kIn3xccAs1$4Q$ed5S!52lyn_=a>5f%1kPAOJTqvmNTot zu9TMvwRoq@R?^~lzDK-`k}6tCeQs5}4WjrPpa{u#_+BEjc6>@-LxX^CXK6~^&hc-1SGUn<)bUx7~dm9UjE z)8HntAH zzez$xj|>hml3*prrW?C+PtAhz=Bs5>I&Ck$;}paqrBDIn)|LmBepjTIl5w47<%LCC ztJzEL=vi88TT6(+xV#6ns%x^0c%{L^*SNo1w@mfimDR>(x~B5fxWt`@7;v(QY$Pbg zk{kvf;Bbid46x{1ZtBXw%VtJ^6B>T&I~$(WYNI+V8EQg4`s|v3Bf%dhJBiJ%394vM zm6dR}btkvNAUdkI^cWfw()p4ePfz5@8L839A7Pv5qL$HVvR-2kYI?XUm#y+H>hJJh z?FR@5UKwHVp`Qi1V-+%Yvjh0~U!RAgf$$jBIe%o#0FWCsbC@_K|A|1clIShmA{R%6 zfToRA2A|77sRiH-BMm|+^4^~3l9^j^&;ao%Pj?t@@&en3h44?OdJk}&eJ(?3eM zjTV>5y6@i?N!WKB9EzZ7N(8!djnpS=fT-Bj7nC?8Bcqeogy*!rRnq0=J9;Mi)%(>WPAcQT zanu$pZV(PXHJI3^%$UIvxytex>cr5zb;ym`asx>_0|?7?0gZ?b)$JROPP;CB?v>+d zIoO$B7x|*kcr|C%xUde-FH56gD!jM4(sfn56y&_?>f9Dmm%ZDQ_mI5})lK0irnPXj z$C>Q{(P?>jz(e+O%IA3IpS=+R@z)jt=x~oenLC zl z|MtP=A5)^4@#E}zb&=E1me}|vh_5^kjh2p`*RC%^k{?bVfwwLP7f%QIrJBE z!4FolI<<|P8R|=oRQ+_pfX+K`?9>Q?_3xi5o=E$ ziwk^uM3RQQZnwobH+vI~a20Usq1%XqBT@Gao&;8RJ5x0OdgS{EpY|D1QW2S%UCTw*)2IHKB@=wGDt5YC z-4OTO4WM;!C6CsntJ{gmeeO)qh6EyNMwj(Fhia6m_P6j(yxKn_V+UM2mTI!C5UqMr zi4`rc%?+*dvIBRt!lt0~n3bJbFDGF!=xStx8%S>8INptS_3G0;cI+L<*A!4WY7eRh zo_BM8s+9=b>Dcj(7h#ov!Ck9-F zVKV;(b>Iv{se{M<|F{5p7q?R@{a=R)X3mWXZ-+xU&MFuA&SYdDvpS#%39!YP*!jXV zNFq&!>8`tvmgyx%jT`%4oHKp}Yro;ql6TvLR*}HOxLu;z*o1Un2}e zaUmX_cJvJ;U`q_qARZ12*j}#)0nm-gmC@+Alz+--g%D?!bX4i{xnQ?ET|q zt4=BYtY3QW!e;&rGU*kfE|~>cC#CNAD`_i`i?Qk!xto&h@s29CuQl9Em4I_=?#4v= z&+OC(3{1mahnglcj_4IlAZah*r}2pNCXWysO@%^jJDUJpd>W{>$#htlD+D zQ-{N?BhuLT_10HZnrE$n)|D29Q$mNw;dVKZjl)B-y^~a)`t|VYsWS^vViEV7y?E(Q zcyGso#-;1vv`S1(dW|^9g#g{2X&OHKb8N4iZIIS+8mG%?0n&l8aIJ^PQyaqg=PR=4 zQp+6me<2PER1#{D%QGOdd#)Q@r}+roJ%BVw?P8G)cB3wUD-soqRTShZY8+u)F8uzN z;%3|X#l!d-5~hdZ)5T)7$NUr)^Fl1Rb4Y&v3Jn#wsGCT!R_}Vzam(>$=n28$045Rst^e@)`ot)Aw7mRGAYSy#C|vHaGAoOHqIcq`lTG4NiRz~K-$T!?tg zrO}I5jl2H?$hhVuzWUPX0*`>0cNZ?qaR_woo5oAzC3&rG8R-*{SH+$4NGLG-r=kH0 z`78jZJWddpJ&1`%bFL81Xf~>J45i*x**5+rcb2(d0(|bE2><%K91(-rE2Lw*Kd-HR z9KG#ijI|~<=iiYJz$ks}f6PIZPJ#TmBH%?a=;K(XYRzRe((%Nnw=LP4oz5ww>R}cE z7U94ol7r`K&QEY9^u+}IOtob@j-oRdqlFzp0d%)HTlLk|(I+*`XVMA6oXplpcADkJ zl8Y71G{|G;0w}(o_%4Catw%qfx|yA0X8iMHPJ+O&%`gX>Q=rTx`Ml_vi`B!pAnY@% za)8Zrxt$#xLmu2Q|B`)79jN`mO!z}!$K27L^SJH@h&-woOgH-G4GR`av&++q&d6nw-+v1ylRdo|hd^u4&c1f|RYptcGeokq3qYszT_qBTj{wRmL^`6)4v1+%s zAcvH7bet`-5!zEYDWb^&(OV4B6j207*ku6<+uSKyKx5#YgL6FS0GoZ6w7m3fW<^9nUfqV?LD73ofmvzkf;#B~?7*w`AAT%g*66?+MdelQ1bPXj{Cu-hhfn)3kB1P-#*;0}%r+K(1^6?d5P2jLKYP+A;U~ zh7CuiZ35z#x;w4=12jf`8g27MsOVy-ba~oEf z?RD}c$VO>j(QU7**FscbRF+-hkROk>vU%e!2Z5}(__kQ#q^ ziqa{{h`YMcjmG{Vt|qf7#gFXv8xXVWW&B5D$Q~%JQ-*+$nhd)EjYNx5M+r77tHP7S z2hM_z`}*5tYgm|7ac3+q zeN7Yh2U}5o4|941n(g9~y9?XBbn@QBouYDV}hkwtqYH(r2`&E2VUUcRSYo|0334Lh8dUZ8LPHj0;a2@`m5()#=iDQ@VJkbMI`H9P>X+^S5~Jn138 zn9kZ)IBnz!FrFId7G3vWf8SRYKh-+bzF+;rHjk5@eQ(1!S*LV;7VwS;2@3#s z?tqI9_x-i&xXzn*-VulS9lL1DVNLMk{8yxqHuTpBrPloNA1XA0pKBom^+#riu8rPo zhTal{gAka&lYIWsb0N@qIR~Xe!Gq|tmI@9E6$o#zthR(Dsur?Lf{6KLegNP6LDJ}E zrBV8S2>ZwQy7uRN98QDAww*L~W1Ed_+qT=-wr$(CZQE%0-#tC&`}^FF?x%ad_MWw7 z&6>I1*J!-VT~m&5Ixlw(3Py27yQFbpU@?q;lK)ntj@$SN{&7s_vbi3lJWA3l(R>y+ zhpfS^BDMmB%ZVT+d=;?<0U)2m7c^X?n`>S)xPskHTfxtxTm<;3`;eudm0{I>m#w_r+)gOBJW5${9tn6iY+hW9Em!%XHq_@Od={aFl!yFng3IZfU#3~eq zGgwMexdLVg`GDMarxL6N2=aG?r`kjp<)`{#msCT7en>>n#PoD@?kuDWVSsPO!7y_J z>rmT23L(*D15TY6)w7o1V&%yklOErX64xQzF(I>Wu_fjqRS_Feil5Z0W$#nbPwb|T z6-a0QMQD=T`LJcddy+H+fXa<=59ANLUms0xK?%rm0}=u{B0B6yf^;7kAM8wjov+?_ z6bH!gs&5YafWT2$b9P9J5&}kgp(1}ZAB6xby#ufv*_xEj*cX^yokk?D-$0R~?D2Ua zS*DSnB35v~qj17k^Y--hSOAKP_T7OfR>3E2uV6bY6Ioca%?tS95P5!&wPD&^O7N#D6py>Pf<@@bX?enzQa#{ z-=^517}bqcj!J4wo-OsyTRN>$!5NXS;7Py?FIF;y*6)sW8QwM_XlXy zZJ6{Gw(H}vLHSoQt>)}#{c}Hvf9?m&&C{6cCnIik3y$1o@w;5_5hS}NWjuBO^3?tj zT1;*`5h5K8ArB7^dmJ5ObU$q)EdtdtU&dLJ#`9#b73V-Vk?WgWSKveeE5tHrD%#s^ z%)2X_(#j)Mm-FlI7hdSV1L-`phRqLtSGRi4aJ%={w^XpI^Bt9t#t%{BxnurKIGuj4;C(9qC|f{Gecg4-4{77&i* zYOt2kqZVZu$sY!s zhpR*1d?wIQRp<4&&9}8J`%;Z5IEP>i0jsLnMN^|+lx3)sM&DC*kLnx}hr zJADyF_uw4-g!G_sA(qqawZ&WO)3A*H7S#|letSr3!AL;6j%}v&?nYje(mZ}|SCb8m zdB6Fo!4da%i?Czez^z4gfp0*en6Yy?x?!Pak%JT8(!1C@&>5z5_APNf>}-d*NFOqP z`Rf)za&|1Z6u*OCUPJ1ytxd_jl&|AO%)s9qqEKj>fRqR_fJOm>9}2K)mm>DUK7<4bD~{iQyiiuh_L>D9e`=f- zDb^M3;dEd}QqH>C_FHHA@z(V_=uqZ3A0XS~cf~cZ;`crh4reXns8!ub1{~dnbX!>O z*n~q{^|4b;S{!hjI#rRjGRJCLll%DD9?7wUOH|otXi9%_`TF$mtZ9yhzv&)P*WusQ zSZ^>=V`x8?cD`7P{lw)|T`ZAT zcwKdKdMeTC z!~44gG>d^)AAPtwHd!Mz9fmMuKsrcX;p}~Am5#aB*d1Yy-kt*8kZ;mB<*jxC8qpoZ zdZM^H1T2P`nQWN!WPifFXS_9Jd9^Rw*W^_RE<%n4IaMESH=oVTZ-AZ%CY1JIBm0|E zkY@VnxZm~%hdvy6Oo10T*G#b)%eW!4@s;em!7_G>^f%|~gZ*(bKuGl8zC4@~N2DdD z#>FFK@yF34G1aQtu5y!p=T1zD>z5c66O-7L=bS&Ur>2b<84;6_2#GS8CJ9XY0u~&L zR!&jvVqCoWute8P@XQe@e)#1Olk`gNJtCMpLJ+8(nT;pijuP|m`!RR5+r`0loX>}T z`~coA5+4a}zJgfr3q&)C=$44FH3rjd>t}Sv4|ziCTx-Hw(-5UG9My~{QejYwIAX~N>*TNf^=4Vxn;!nQl1~4jv zYRPJOUW9e*Ng>)>H4?g?MYxkZrP3rIPKm!Gk*h>&e19;xABX<3Z4xSlCYZ}Bkck;8)U&71);lRHO_A?AjZ?#h(HdsBro)v1ad&a_eV zB3phKvASr@DFUhNHmjn>3`;IEBQr1&y1g(EXU1xt@AEpXnY1KLc+=v(4>mCyCHv#7 z{fRPPUw_51im!f&q-m`$7~f3V;}P&;^DXnb!?HJ{6zTqAL24ME(vzHHRNmRc!|K6W z0A-t*-)S*^)q4AAuhe8gvtiz1WMx+2`iSVww-|C(KR=}Lua)Ec3EO+&JbrT98WIY= zyO8(#&!yKH70)zqAOgpt1y?G4l77Upptsn!=&v*%Wv6s|Wl_=c1uof3*0$XZ4b6ma zn~>#0tB$4iD#Tv|^D3~ekfpUKGE#kiBJ?gW)ec&#>ZLj)s5Dp$wfJ`ym+x6bMUkSvDbV>=Nhi0WwISk3>u|~ zQsV0)?&4Cb#N!?|Hq!MA98sBmoJ5($Qa^YZl!1VnM<_L=`hxk&Wn z4~VthnDIf{9N-U#aF!I#V5h_fOp!@;klj(R)0VnYY&+oIgrTQ-#cfxD+P@v>M0t1jR9D$$pxf?l%O1TX}i1?T5f772J%);=aPckk2k zvb0!ns_W}AnNbl=%w{(vN~4g)IdA;uoX|*28E&44H)Ad;W=Bg)Y3+Ij$W>OFBhE+E zm2uVH?lufB#-KS*-Cw|Ms6HCHtbWEN;Ex>1azjjMY`zs9-F)WLCM%IqnkG^XNz8kp zwJrtb2Zb8j{K6}c_Bti^i6yr_-g6=lPR92nej#dGbh?Zq>#s7|I~7?ysCbF z+xaWQ$N@sAMp)7_%}fU5qR_uS4Ur!E0r3!iEv6x(Ar!n32GodhL!kF8Cx>tVZ@Wy9 zWqm|3xVy0x0mw-L%^gAJ0kr|`g3|Q)neR97P%uj(`BZJ0cnQdgi+}S^#2xlt?H}jq z;@bj@cV7h@2z9xFdn46HK~_gs3p&1Itw(g2#Fl>|bg}2FNCBhANWPBbkwa{NkmTt1 zS9t~_mE1xg;t)n=?kCcGoS znOASY^0%-QfPzA|lD)_F?YE~t)s{EaNt>87n>lPPlEkjsduY7+^?dnFBTLFM03yGJ z104hO=Y8(x105Ilx_7(|bhRg|1^Q<$~&qjnFUMnhU6sC~oKzY++#Rqo)c2(*~lGq^1OU^S~EnJ`j3AAEH# z`A)dN76+(Gg-h{4mhHd-Pv#4gqYu z=LQ=GZwJTVb%j~*Oi|0?P;2LJqa#?3|5y?NlpYsnCiV)I#{KIG7k3Kbkz>@5?FbL4 zCFZ$b9#VJyG&0pa&|VB4xiz$Zv!o#O9IZI2RtM41fomAc zL5Q>e!(W~AUH#E{;1v>tbdd|_%Z2O-L~6Oi=x?mZ{v9JBV1&KZfyIK_3Q`yZRl1p@p z5jGnGy}SFjB8A5y%aD~^Q724qpy7?ao@A^G%nETXYSBMNFC__#CpBYg|5xlx)(wplTGt{!kcL7sSS{F6_(pvr z4av2lBkqbVWUOpY9n($%?1I8N5sJ?SGPxR@Kd_!&3j(56zK%VmT~k{o^czitsF`1F zSXO3iQglYz*qF?)r1TMTuBJk-q$F~|K3{~CjI%@`Ux7b86-!t`PXHAANbY?7`b?*j zzln?qJ|WXpmX^;ca|M+OWO>wfP&PBqK2Imdf~cGeZy*0}%rdJazVv}jzZ_a|olk<_ zmqG`Rbxu#zP#oGbrx%tw8Ya=e86Qa{DSsI$8wZ_tEayi zW%!-KeTSRV@CsBg%dTOcwLESl-t6R@T=4O!9UUE&|90BKGnh2tsE%{$cK z@e|JR`RDWNe5iYGicA@C$Ch+>9gxXU#F*bgfw7&Omx^>+v+A;2qsaa5m>!K>Ze^GZ zf`_B8Va}mzScmk7=}v&4kz9S0JH0u08g=);#BIsSne}W_u*D*YwpsV9A@%}{9|wQj z+YGgDMt`L%XA0=Z7hl4Ay4Bjf6Y?cDDZ!V)Z8VlZV65wa#k>47ag|Eh=pDU6gVFC8N*U@-RBS&WtIyigg; zBC-tnV?4Xyz@GeDD|<pZYo2?6}2Gb?5 zot|A0u{ry~R;v-1DFXyOPG7r5iTZE-6PuGSJqLti0mm#w4jNP+xkvA6W_sw{(;QJd zeb-^`wd@h9RfL<3B(@Y3%lyuGL`+L@lHeoh`O)1$gmO#Ke%iB&Qm2znSud3zQ=4zz zL+YO_ufqx@@U+l5anW-+h%*|76M2E`uNR~s@0=eq#|`dHdpi9he4hBA+l?NdBZjUV z!r?g1ffzrp$A{|?0?Jz@ilXafFITzxoPx;&vnATZ(eB|M1?+hT-ndbOKZ^^~8Loog6&8a8hnyEF>_s|C>i z322$RNp{^;Ll_>p_}6N)_B| zAC!+V%3M1d_r;FNy8jd$wjL@qwvm*ZAI! zq9*O$*YZn#&1Vo^EErlNwHz=}4loH_2fK!fZjy>_5&H}F6Z?;^9@}0lPH&?o(CoIR zMuwYa#psx>n}}~v*fWFFOPvu)o{7Rm2Hw#=r<^~7*VL2n%pAy{FiKKdHlO=nsQ-Ea z1?&XvOVOM1#i6OK;Y$c~^!bY0R8%E8atQlDg;2t0mFy)8xuy1KNgz z4aaO+t@r|`5U56x-zzXYXzcylE3@{WKO=JyquouM?A(9!u4(Px}iw9YzdW~v%Y|_rLi@o31ilUaUoCd zNg0-%G!DRO5IT7M!GTxBd#us{Xemwi_#BHb@v*i~DkKnLrdTOAiv7$NE zQai(s9S{&=5V-UN{p)*a>T!Y6DimcBwL{*nFM@_iOhP&fA{+}3RD-2m3%ZL;k+k8_!F)lNxr zLe`|X^brZ^pvepgvo(N7hbU2u6PcBFh(+3EGDQ;67eNb~j;FkiRd5Jo=9lnhs7?ZiXigB&q`_C{Am`(eLuP*{#y5ku|M(jfbJFdF`1@PR9tEv?yBQsxWBz>NAT8 z+E*ie!DA$5NUSFf9tV@%FAz0t2@-_%vPK>Sr7sX^OJ{9od6s z;s$w*tQ{x<%o5A(Uo#P&ZbHv9&y3KPA^!kMU3*Z|8j6+n_?gD>e4LMKs;{gpKa)zQ zOYEh4|1bvAntmx1(S|ZI-%1lwN}>SxKskEk$I>+o;qRa3?cB@a^GI@2*8%jxB|`oTsy0a89~RDt5d}p{o4*=(wvVMNqEV0ZY~8bZn-}(4y?H zZv2*33&kAKTnJJRrG0>&trDjUzf`?SJsqt#i3Z4KEuEMQi)heD(4IvfX_X1!EU;34 zy?-tp2vQRf_z>j@5$HGji&==Vi!cNwY^n3z>dK^=P=+o`Us=bhJ@)Hz@H^=DFsFH3 zX@kTEt%p!7R8!sv?g$ZN*n9StTPSD#X>S+~X4S*})l$!F-J|U=8uT5~XPAcj`r*-_ zjb64ZwqLgu%6M$s5M&y*vI?zhi)&~nh$W37Inacv@ZEu)U)}}Xe?gj?n$`IH6I_%8 zy2k-9M4*iHA&8wrM}s0<{cf@(Rgrn zfj?elNo92`@$1g1244xCGIhWM3fpZULaAZ{QCsFxrtzlE3;k=rXT4HVy^oFFUQ4R8 zi|Cq>^TCc$R>R|c7yiQ$mBTi59=leyx$juet}#_{)HaHKJv$5x-0%bYvQRM7udlf3 z6voY#)NI?&CI?xfr;(mo&rUWzS3_mI!O1|jUtMf%Wy9!~K(*pD5?-&*f1;kvJI?a~ z;nGyVxQwN$_AT;GB%%w#$Q72vi*S4y1%3aLUcT%08zN5dHWf|dhcFr;f_zb#&9rg0i(K_x zosA)#F&quN9YvK5qJ-60st4MnF)!-b(FwNR(Rt02K1fx*-We1=gkAM z8}8Q$u%iQfHJaLVw|6S4K1Xt0A{M0zyUNGPXoeNGU49H)*~a}*D$~l>vZt}1csN*! zUIK;RFqYAG!n4iEe@uGa4$b1@hXSgWE@q;peBQ*yiLW>1G!!yie!#XN5D~1NAqpBbNDs8#ZW9)>tz1Q5yc~5!eZGUJOevD3x*ZyG5n7T`GAp$biGgW zy7GLKR2Bls%<4x=)x$hZAvBj3=TKyy!B4Idn6)_6XD)5`T&Vmva{LE6x;7qq{8?B( z9P@ih8%s6Sy;>)7N&B?UEw)5y*(N%#?<%}&Xyn%<279wDSXZ8hkji#4juUGeNLvbm zUQ3Ueo_R(BFLC$zb)3`cIJKnpGl7QNa}x!sl20c>&2!Tf!dJDWDiMGWs<{V+!2I`$ z5QYcY{N=5ephBn!Es-q^bnYGR9IJRZ zQBa$A(!1M9MTb#F5@FeF$^10y9c&ggWCH117W>OiBak>~3dE z`T@v#=95x{;jg`*l%WtWIOo^?I_;+;iXBs+@$7&~ock_9%f2G8tnV(Yyev-?u7Jf6 zDukrtnl>FNE~wm2%+e-b9C}m+boh^jhWBzyBISQ~)Lp&<98U?fJmD?-o8??hf_E8z z@g#r*6MLfCYtGwh(Y5&p_?QWHzs-w2W34H)3^AkgJq%}P%8Z@Xkqh8ULv)#O&yA?{ z#2DoR3$?bqgHzr%2BrM&O*x*VO$FDk^ims^C)m!zw0;EMoG^@KoxHEwjpf z5`gIa+;1oM!kJG$pMO=4GOVLuE0y6IZ$NZ{xYWVR1*~bz)cBm9|1s9a_M-!2X0N6g`V9wc)*y z$F{&0bLFsZ`!@B>A}r)xe;ucxrL60$W|=D5M&vv0aDPPWUNcn~>OU`q2-zPCP$ec> zC%VUg6rQiha<#|Jt}C8Xch%|Q*vG?~4E<8kJ6z_m00(Y@uk_%0`dl(!-BM)K-7~l{ zvzCL)a}Mp({kbi02S&5d9iTC$HK2ijxZy8F@E8}lF|I6EX>!U~c4eB&qi4gvyC4XuS#=y@w! zQ=2XPmbEdE5&MNRNnhjT*sQnHk8txUi|(PWq02Zd`!G9OkY3oYrX><|J2}tX+fWP_ za`KAf5pV>KCk_iHuD-SAC-iXcsiVX3+KC+Iw1#XGwqp~Mjn_)|%DoigJ^4v;)ilv; zmH0}1aofEj)UxjsQQTOKqcg3r zxq@_bxYk>)${2vT$!?pmB4pfkAur8LQ$ycTcfB5p8uNM=Fd0O$W2&hxt)!&0zC62Z zdRB2H*w(nU;lmjetOA;CRh&SOvxpRl{AW8rxe z$8zk$W2DQ6>eK3@=`Jhti5WD&^2+LGH;0jxkwg0t7upjBhAdsZrmt&+OU+g4RC*N{ z+`gD&weofUtURg>;h*}9KVh8lOPwOV8f|%S=GT>VpM#}DR%ckan9xF6c+ob=iCxm& zuERLstj0GPk53jP@4U11{*OoRDYf?xNjeD9B6v1$;$r9i$7ZZ=vf(Gr|;M)zjk2|+t<~KKa zPNF07>(o})SecH$>K_SN;+TTz1UI0Lm%eK&njM*0@cIxY36;l2 zo^$#-0EbeT7~b8~J~7Jda!zY1DL|O(w=y{HsV}bbmVH>Fu?DpqpEE5eaQv##i1o64Zwq!%?ZLO3( z;@R`4a-F%Up>1h`eyc%!`k-@1}9qS6dq}+I|ck5 zHn*l|@KQwUEt{oL#RUY;(voh=dkIiszd?dK3-`hdyHn!a`Ii9W16E1&#Vn38lGgg} zp*m~o5&{xmFS!~41ioJRkhvR$-91q;yZ~SkC`PDIlV%{e>>&QW0`x4v z3M%+PEfnG>!r2@QfRu5pf!Rx{p5V`YSK+lthX#0!PyTg%8!gv3IzemZ`+FORAe2Gk z)4a~$bbH=y`!Z5@Cc`VMW042D(Rin0Ab}&kM8kqmT`$nHD!7Uwbt3^2d?y?jsioh& zp2_IXx)dyaJF5j3$gs!7d(En_e?4flFQs3}pjZ6$?5q$4drM3EU*^*(5Hj(X`%7b1 znU9Ihi9=WBPe2rM+hvRTXl{7=b{we~#iVKC8+8i$`rXy(dOUl@R57~)r#-bb4Td&Q z?`yGX6}{BZ*Avv;rn1u;gl2H0AJ~TNH5meM7#mt9j!EQ`oR_v2dJ8Yxu?6PlN)^GR ze?sgZ*KjYaTY8#D5H%HkB>RZ)J&Aa9Pm+vcS)Bij{3oUMirVvXQ_-0aw}mkSZ->jr zAcjuUtPfh+?>Xjqj+(_AD#MAJu$vHWg;HGcS2!MhiDPkvf#AQ_L=Oq9poA|h z0BJ>{ffd1z970--}DLXz)dqsuC=VW=<0#9_p`>3Q7G+N~4&`q=^t=^J0FHzziCt zjfK&5vN1o-2HKcy?Zl>Jz1OSXVz4sP@)6RCOJbfN#j3HESV6#jGr^iH?r;Kw)|*mX z*ioU4BA=*hofGpjxQEDTEJOS5Z@^`s`?;#wSWQDivjBK@JSD=jxuR_5ZMnU{8f8wG zDjHf#0&EY%&IUTCvYS5OaivvpRh1pO+<%Q45ZcJ-l^$XyLQoG6O~CKohcN6hf)wYsSq;l7%Pbp`TmpAodh8 z9-j=(Wk>MCQ-P=)E*wc0P~|mPomkh@$QslK?-flT1&D5e>Y#xf_(D3szLxBhlZHnZ z49=|1m|EHq{NLp$;hU}3D~gB+czSxuYwNosS=bBpDaUx&4?oz%M!MJT;pd;z)ml)6 zYzn2nH{7Eip^_h?n3m|iX#fzEyXEkJZ#lVe#de;xv5L5bc(Vl?1nw@?tnzka*+;EXUwR710jUfwaG1E1jnl z$%s_*xlS?>JO%3PP>p!^n)zVwdt{)&azCj2+ENs=w0dwQB@o~1yQ;}Hus(jUid zO;;UbVr~WmC?m8gpFd9-J<>NZBR@t6=VXa1wXHnCA;C-v0sZ*q=44#$v%$Gsw)g^J zuDqq2g>#km+lI&Ed8OQ+nr*9u?og{k3*oz_PDjaO-Mq^Q(iSOx<}1SP!<*dAQ0J;w z^|wmr%2JjZ(Epr~A4GEIOYxmhPuTY^LY<@2!D5KaCG!Tf-^<(d+JuZnK%qt#MmyF! z%4o{96ypz>8YGMX?=Cfp<%>TdC*g4_utU7==6j0LO=YrWLrHSmTsGc!YtWtCgb3{S z+e|j!kJm+0%ZIahI5cJQDw4YchWDvH1z}B1llQW1E7X{4aLi58pn6yC*4QKIkp1 zJ0c5b553ICa5~{(-x7A~y#aQ6a7UURS5k6P?L_-}qR6gr<8tY(%}?*i)c8o7;P;T6 zT~WiXilkDv5gWP68$>4I~brl2)f?ezq`x&lz)L1lBP4IaD5!EbK2#Yrc3%w zqk&w1=r(y5b?%ify;?{@sfrJa*kBUMSIz$S_(AZ68;RcFj96B6w<|xt0@Q!!BxX3j z&Hv6>cO@3HiP;HwacI*|TiQjs`b~ACCaup#83o2OfbXf5XWtz;UYQIjKQb^nFLm~l zM3~G==jEN4A*y)P$KW`EH{s*nu4gDxIg&h(yqvlM7#HL>ji+4H@W7@~!Y8FzLdmkZ z!~G$8Wastf=o1Qb4=}8=^upx-nN@c^q3-4>Ffy3i4jZ#Vjt+6 z7u?c3s*VsQD^k3>2X_-Y$(qG#^dPrN7x5Urc#jq&B^I(1k_rG&dx`0_I57#|c-VNl zV?u)+Xx>Eo4)(S|c&P9*K20U((cDn!Cm!MNElm9e{UhEt*bt^NiWg{~~{VFiyrGDu(o!#9ErPVgp+WhUGv z9@Q&}HHo1LV<=P!;eu`R3KB>|2fw=vAEgf?D5TN^KMC9t+MR~gQ3bj{*-biJFY!(I zbT7yH0UUaTqfqX@*5-SHL~rvjGc!2T*8`G{E%k*Sqoe+HVmL5;EZw6pWpoi%V|fbS zi)T+Nm;$0oIe5=$Xv6HXv`HDXoK2k<=|kk<5|!@DgWb9Us5^>1kHZH7CWo?*)FL_P zJ%j9^0QYFP*8iOepuXHGX@GejcmIteHOTU^4Ke zE38d_v4XZ1&!IJd;~9O4x(gqx7-liGJui{U2c$M6PlrqH%S$f)9PRhl$!puT!(hA2 znHT!){|gFpzP>AEQMxB?mXikJZE=;)&Xff_>NAdRWd7;?+vDp7HBmk6!?1+d-rs)r z%+DTmQ+LPU2p9?`V>d9ZA~b#h3=_3^SNnamRx>gJJ z$7Qth_F=;fI6S&6&08DT1Xw5{egdc}Zu3jUTa#f>R{G~YU~ar@&g_qTDkV-;kLl;wK0?Jk zQSHF?$e-qLJ%(-Itr?aX;k*_&i2wQ183-u=$EDko(ZaxU+rwh+5eW- zKUx8!`upVf55No+vYAC&Y06b9z?N|42l$s2kQw;=^B-1rm$`#{vPKaYv z`*&Ls#$9PWRzF@@K=a`Im$tpl)Q;##Oh;K&l)MmF5I>}ZN#@P zJ7N!O?2o~JIXbcKdqVViyF{>DgKPwG*pB6xCN%gnNc% zQdJLJ`WE39QHn)3w!gLedfsV)Z(yBY(z2^s zem249cV+txwd28c&VA2Zs-ahRK%Zsvde8WG zYg{c#zEDGx)H+%gmW39E^TD7^Xa)EJZ9As@--UP8sAb|Z7YBf7x;>I#?*~14voctR z{E$y!PURnQ1!(v1-pYDjc}`Ex(vQwgqp3tR@i4M`aC10c|67#=Jo{q6BxH@)lH2qhj=hC~ zDvmYws^4Z2vefe=jX7gI&?NmVLS1=Wcu&vJKXs-%rpUyRF{m#r&liI#J(wS75JZN< zhP3Ak*l|2T=SpZ<9yGwqqki=BeCbKPU!(bdkA5CPN@;_k;rF-EKG^Nf=3`-qlk{s1 z;J^nt)Q7S#9sYhC`h<)2dIz5e?*%AiN)>&DJz*QWN==2nm!5wI#XUa zo>w(i=&rSaZcunBFhV9Pgc}*mAF+Xo7JClg$4sH;?vJLr$JN&9l_gwA5m( znt0poMbDN+3_kc7GxTA%u>#x>p*iw4H*1_3?RO% zj?0hF&(_;L?#3jwm9eaGEws!eOQ>~-(kDT|GdS!rO2x8==B#F()KFpm_7nhM$24G1 zXAU88OR}%%X|n;#12c;=>#2z`We1qWy%I=cR#( z#u0m_Lwsuj1Dva+&l zkNz-l?E5;J_u5PJ!|f*CTkFaM<$tvRXFVEyHbFofrSAs+=JE*qsDyzg+1yq=C?hJv zoE-~57a1>~DKioWky{I(2?9!^2xx?&ca(0s%>nhnUO)D^C2j*|%96$^zqXPo2Do}h2Nl%j-NHTMrW$*BO+Hk^TN6|iXexTQ@32*cHg%hPNk0L_#{1@$ z9oe?p=Klpg{X4TbOUSJzKdmKIpWJ|WJ&O!-ax)jtO8;E>@*cSH@v+9H107mU9`pbRB>4z48sbTe}UEyOe8IkY%NyXYp=fnKsVo}800XN zg60cAxfb~agW~d96~D)DM6=Xr&3*tl9)i1x4WIS*ACZ;Tt7JsHq5mY4_<-j!yB*~L zOUq%k%^^WKzzS((IOiM>!6u<^b-e-o8C~tyq+KcSJGDz+tBjT-TP1vVfje={+hz@N zy7GA66wYm(rp>EM1-ehCpn1S?(Scn=-NL`c%zx>b#A1UdI7wn#X7hb1lQbOV9sP8* z|A=#sUxCn~dC+>YbnRWtyz*e)RPJ=moe{M-u;WzqGx_bUb` zz}u;5HSkgXfDFm8$#Q!QsKXCT-gyUkubr5)b7$_h(ty@bJM4Z7eycYyi>4_oZW!Jg z`E=d50StLv^;jc>4r>eWxXm>DC4sSv1Pso%QfW!bza34w4$O*6*NV<|iNPn;=iIXF z!(g}qmkln|ER5kya*q3N5bj=(-ah4cEq-bU{uSG$^Ww$ea3hcv^f~&9p6)gOiV$~P z4XEh37hYaceyIE6g{~-tz^RiAe^PqHbd*$|;H$^jMC1~(g=8z}XAq>3+hl65krGHh zXnDIDvgC>DMVPQllqEcfgFf_c10D%I?U(g-JoK3b`JRl!$(tR%vr8Bxbb2tsZ=EGeqL6?_YiCA#-*IVZ$*c^!kC(JF3d6Dhy#BW z>t`kb#9CF7vX_+JhEVZ;li0Y=ST4&Uz{cpLy|_;Wvo zZu{?%0*=kTX2{(W7J%>bH||_+ScPIuxgmaP{e`H1etsy<*E=O8F@olIT~nflc_S5< z0;Z1tP>lkJZF1Ift$~X3eY(qF|D9AwY?TlY3=51M7~NXh(^2Hqv3}@t#ECr2I4%N{ z{?h>Z_piH+fRT894DZP6rDCSLlJPbTJ5FPm-@OzhX-h~-Dx?>Mg@tkbs5u$O|Nr?_ z0uf){Onf~tZBm=hr?&)Nh?tPUbTFyGv?)>uq#8bK0Z%8JCk1VAVfs?&!EINBG6WKq z&oss4WWUjOyPNr5R=X5wO)du^^GL=$10Zo&!!3%JmuZpwUy5Bn;#c9(8iH!=5=2y} zaek)_>WYOLa^#5-;ZdBH?A*=s0Rm72JF;Y zSeQkTIptA9gz&Z|0)HabGZ6Brsq_R$oyelyw+CVT^k zK>yDmr2K%awbObjvtj*WcRJy_WSrN4x}h&sDAH zQ?3|JuD<7eoG(&8lG6k_d_T#I_ocfipdFCZ)}_pIYbASs9974<-7Xz|{`{4ciGiO5 z6lEokhB~SwADcr8lAy>n{RwulH5Rl7r5j99FLRpn{NLA6QUNWg~BaK z2ht&qO}vG;DtN2B?@9^t3sYwOZhIj6d_B_co@gPdj;ggM<oU zRT|Od#zNg^NV4=+gB%E2XQS;vBUB%(7nu7c#d@RJMei?EeC^CO{^|TX9FL6>YQN zntBMPOLVPs4dbr=gNiVAbEy3JAp{kg*q{vx~&(#U4*gHAYg%_cu~iC_UL&T4~Ej-{FC&=q^5 z(_Uy0KXw<>Ra5D$lqi{Mv(-gt#We+}4l7k7US8p{^LZHj6THt}@?OFH@rOGdS8IGP z&|RK#qv&G=$D4K#=V3?wVKa}Q%Jj>(@x8fbO)}GSK=JF~6w@nyCuJ9R@(6qTy=ti{ zQ7nR(#mL0)m_)ukoVbtxWV^DNUMColws|Q51$T&(88koPTXQ~(qXa2Wl2lRlb|4d z&7UoDzKmL$bl>r2yvp|*dFSPYP6rxzMy%k zCt1o0qZa>z9CIop4o|~&LQ2k@@cPb`+H&5XYSjX}ovR&SerJ0|UIv;FcfiID$>@aG z9tp{)E-qqmZF5Rq2I3T}%d9zrBvBHSYq0V8^YnBWXJvWwOfi!EyPyzE_Ldd#-1&U! zoqJVjb2Mf$jrhp}TXu1`^>|lV*5n!(ZANs7S#b$Q6Ar0(G;I8-&T<-gb$x+ye{F_0 zkRQ%2&wC@53!(*Xa#Z9y1w#&T_G*xQr#IIoNaxOEMs;(sE5_}$5=;d($O?0H_Vx~S zyT3<9R(f;8_bsgl>!10{oTZr<%|`m_;vj-_BX$ap_7C%C8IejREbrO}W#R^mAsrmR z=u!EtX|rmF=~c^f6@UHe6GtQ|OqTzScGu5sS5lSzGVCk0O^iQ!{dsM^JDbK2%`ao` zDIsSMC@qz0Ksqq$CpWBNawr@aS{?>QIWWBW8Kp>SxPQYGEARcuPNmX4@?FS<69W=r;#iAODL0VUhi{}*8U(}sARQws!tM2e_w8m(a^^%x z+RC(?X8V%C{06p2#Sz?Co*W_1^Mn^>wllj-jLj`0H;^GGM9S0w1KR}!D5N*fT-u0q zA>dtyMLjqZmQX6PUWWGir0qL27KBmcA-IhaKk6itGm@iwMu#UP3}L6FfI9^!-tuv# zaf{}TnDvT$Q+ooa$b*p6ViRh1CZ#&d35YW@qGK{}IJwahCn8Yy;o7)Do;`abBercZ z-}DOPdZ&7=(w=r#E~Hl-4`5FDq7vD*=qz=)S&^5 zte1}L$prpcbti7|Ja)gYrR0nSh~7wsC!()#i~IDvjdIUHo#km+6cDq>bsa9nrsBNF zjL2$R%&rkFy%lDO7H76dWBM8Wza8`jJ( zF!V8sc-rgBP%cjzL8l3gi39ss6n;xd1FQ1`=W=O1RtgWiSFBbVFzJovSu?ph6QSJR8fe;~TepgPBqjn3&k(_hXOxR4_Cn zQ``$`&qxtC*=ex2*^Zv%zvKv@W}DNGs@cuz@>tgUe?HZW0G0J>$5O#P`nLw_PmuQ$ zM+Tt%+2iVeYheDtD3u}r9H1Rh7U}SBL+YP@>CT7Deql3nT>f`R#6NHX(3h!*Up1&S z#7_KgVFCbR=*|KJr89F_{?i=sFZ?Zw0V<7B*(=>k|J$VVPrj1g3?N7b3-|lKEZ;wJ z7Q}!ShRW;~>?Z#Qg(Ncr1d&I;VH5p7IoswWzNdQ&&8KYt);9hlN-7l~C{E-j?ElEI zWdWRNZp<~E(wY7T1CaqB^N@I6-@l2cmLlbGh=35oKPtT?{_M^7tS1`9dNs+nEiiFREipVhau{ugrSYbP6uhnw?!X)YhXg zyJ!_#R9Rnq44wY^nfMb!Uuf^ijDeZi@J-H6=zpam0(($Cd8%KV#e2*jqx}33>{23rZ?pp1+>ZJ-K>=UPltrw;N3J%+Bwgc4P^a3-bEa8 zUPI{_bZmC(ybI1=1z8u!=&X(sZgVTF;!qtCP>totx8=-M0u7;s^EHZ`AMTRNQ&{y4 z9~$rK@2<6UzVnTt$6UL*xm2sO{;=TB{}LY4@z`dr4xR1PV#Y^_g8w%(l}qt2ZL8`i zX=<Q`?c0y&}RSsEi!ga#5}(rcd(^r#*_zQaaCFj+zyy2Pbn8WxqanbN&}95@sxiU`9FoUq#-{fVk(E=-dsl( zy#$H&MVO!V=J70R16Ec}Sgk>NQ&S9QjO7lwIJ|1}i;muTVfLF`VvJC5W1k@=Z@uMN zA_{=|n+>BR;5b^o#*N`tsv;=Pz3%K=8MoBggI&9qxeM96WStfRF7E#(Y1~d_yFykj zqG9z#vRj*$_!B<_Xe=8VYwwSZd(mq;$Fo&hvpGhy^_3RSM^HQm_c?rpB_M2)m~rtw|~4ph*d%wDIn`5eI-PJ>|anfj3{LrMEENEw11d$lKWqk z5h4jtkbZ+$98WCP2MNl+&m!9f&;{|<}{vlI$Xj-r+^XpA!p*k6?!wpELQFi zLy@d?G>4KqNZPMT!QEP3r&T#b2j@K-GD%iMn=lQlKbhH?&P7fJxzEwxK4PQF&a8-o zMWy?^wg|s0m&ywrO(s;^gjZ0hH@_e|FS7P!aN1XytHJPBja$|zsr`GhJ;}V%d>qb% z7rkUVAoJE5L_saP%8u3HB=Xk13`ZK2bDewdN@VPVv-R9fCJqLCS6j!RFkS{e!s(ux z!0D!j0Og!?cpmm|WS`z|!)knR+FtDGJ$|mzBE2KG>Ja(wxC*; z6D2K4LP$&=K>VTF@#m9!Q_|m%pNP>mY*5%t(8~_UkN(tgLa)$UFu)}dJ;ZcDf|6SX zwCJPP^p^6m3dC+Q?yyuZ6MghcgU(c-#|JNg80UTaqjEVD|rYuM5`48q$^^rkO%^BEL08C z_5XJ)Y(%$9aks^nTWe%5;&$mfe(XxN-r7i_*b%ZX}QDal1bHUQzd{H9C(b;6X-EqAq zR3c`k@I0*((PN)T@0KQZkV$_K*m06@tryq~d|REFWj(8NwB()L#MQv}e48Oa;Hc>c zefD5oYESiU4DIVuc;1ruJowhwVp7!5whE5fp1}e@4dRp4$;8#kGH_&C))V{-#wfO%xIce|E=Ba zEqi|Q>!FfFCiZ_!kWel>6yn3;#xg~;|8$ST7VfB0fJ=w=qYnDQ~-NjUm8IY4_)YoFew;FWbclVoupx`s>xpjsQTqc_^ciaQ|gH6z4V9`}_yJqBaB&gDMo^-4peqNGYj~kch->TdWvJjJK zxa#Z-v)xg=+Z{ScTgwgoo1fO}WpqCCmi>O<@@?k+eN%Xm51c60y}6?!65mgU@C)=G z?ow?=ct0%(K^IRArlSE!K%|i7NHr3wb(#;f4j+6;(p|SL|7an@zxq_igP#Bm^H1uq z=`LLA=qwfnmqT9?eE>DJpQA|M>d9S-Ev~D+{b9b_iLYBK>k2UwKAFWaKU(c`|lQJ-$I#?F_P0MfcnwyqIIhOSDZ$#W=P>9$mT>XXDm$y2((bO(->U_a!ZP1)t76ZEqybz&mrt1P=Jj3VLr5r$cM#|(i+$ipGQ_} zU!Wmt^J#ZaN0kFnuZK{X@gGAU_q--U1nB^~&*Rp-2_IYFRm?|USaB@;oc9$UVjiyM z)a|gi;M?FC*PMQ}?9T5F&q?Ng{|?el`${XW0W%f}<#E=Y8w^Yd?9LYk$O2kFcGUj* zI>aMYv%i&M46p%cNf!clpsR$TtV!kj-!Af@N$cuWh}plv*;v5e>C1)eeQToSqOj3S zCT6CTV6R45ul*XZ*AwDiV&a?rCDzR~4YN^**G#v%nLT-Lf;n)ohn&>{;Qn}?W9vfC zqakIv!g-S2cdW;;i7?3H)^huB5$m7rx=!J8_p_5NgU}P>p}gu;x>Vi=0=ZeLJjouJDUWsXeDEX?Sr)YkCcd+lEX49Z_bR2}xzV~>7$ zR^ZG0PfVjp5)hekb~!LnT)C!VR1y+`a6%6U>)yJmImlq2Nya@p$Ya(J_bdBjq4wdy z*&o=!l*nJ@TX%(xl`AQH1e_N0bXB+7&O7Q`B+}kzB`B!A6weN3gNU;080k1Ius;AP zsKp!Z&(SDQ$9O5RPbNuW?lCe0DaXx$((H;P5OR826&Xg%#J#wn72y0@w z(ekpnuekJzBlfs_4N+Xr1VY{xUMAPHd1^7MLQ{tnuB-13j*gEGA<7kbyrfwg?XQc+ zU3zz?ewS}lhe@_bt9jo`$}5J{3GaOOxXTf z!HVpgQ*GUnmtJr=PZ1LDSBgHg?Cxv&=bt}5CNT@*03_T5Ukq-}!&t0!wi|GHfa0P?kX-I?m zHRee2JCkBm*A2;r^3+&TVK}SI^4bsd8}u4vYP$#SM94xc6N10%Xe(l_Vw*$b=o#m2 z*?%LeM+zlsC@~sdDyI--vhdWEH{rLG%-%8vWfx7zgo|t4vmOQ>*Ye7DX3V?b`*5zl z2c9MEadhV@99KjQQ^~B|@sL%}#_pTAyPrn2|q;yR=MZcBkCU?cU#b;RdIW0P{7`zsPwGnY>C1Qf>1V#9X}Xh ztYJF?VFEi2QAH*9VA9w*(g?>We}1qZ>B~N}udk#1{j3t)!4Up6cEVy}UA199_#7Ub z9EZTL$_GiniLQ>*o!2Ugdc1#ln0#y4F{o8&_65HIp8X(|-kP_U##iE~vNtJ$4?;5R zP~Js8eRn=Ocjk?Rozc+?EgxR z4GD&gYTk8Wn8aCqkXvn;>FA7v5^tjnjg(=KwLxg2gFv>s?3LWUIklpJCi4)jZIYHz zn_?^mb=+Jz{AtguTc9wadF=S$Fi7w43-VoTr>u~k^mDp@NR=i2uyg^X=4=NY73I#L z>fDNg8I+V{D8o+`rp63ktSEtIgsQNnZF;mjiZ4;pw>hXfCFyvbFqv3Hr&loKSZ#ah z#>i%9+TAB8CGl*pShNZ_v0A>iF7Eo;nw(WG*uvsLG8b|cLUu0b!FP7yz~YNrfF!*0UyssX7)SpcE zN$7nhtc)LtJx=|*M(wbh%3`6zEFikn0T%78hnmQT)%?i?ylLs;#+hA=Qs1zWCK?znEKP*M&kD2<<3x}DJitOaG$ zuiX=r*#)=N)Bc@i6?}|6o_myuTS*8-sg^$7+}OlGq(Kg#^4CJpDw9-!r2E9RdsVxz z(M1WSzItCpXPSlNHPMKF9)1l)KPX6{ME8lq3kn-Q%A1dz=ajVbWq%vDtg^(+K=e?b zyqx379d~4%CY?6?_ELG@7CZ%Cl$4q$Fp}L&;qK-tc@y3#0LH=)P*77>$6dL8gZ9g= z7m;ZGg*ajP&v{CYW*QDIO;3F4ArAEf2;cX*qr4Jj{w?h~cq`?b=iRx&lm_B8F9Qm&-wP3b5u%)blWVxnB6vBId^ z6$x=!pT&INGRdvsWM||(3#3-0!K>D`zIpz!2YJDfnX4MYyIO9S3ABT%MOS~%5TSUc zPL}7#{L&cK$ijcI0AXqnhGZzI`VSevA?J^tPSV%@1yTm=WL?QhRXZdnYr|Qs25<0( zA&4=$6EUj+6pn^dz|3GrC`fQaelO6=BUQb_A)WCa@%`?CB-^hX+?+W9lzU;>_cY2m z$};n3Ij(muVL4BhH7oU9Y7?k(kvyhq9z<2mvjI z;7$B^-PR?^HaD@bbFwQ`RQ7r5?2!P4+7t7AFrD^Tj3@U~Fb-(mgE?F2#DjM$y4A+YoU0+eaL<%?Fv~5v4*ae>21u; zo?I&ueDJX{aNb%WcZGD#`yF8nG;NBbZQ2T*RTafsvI`$>g zghP#N+-Y9P{J%DHO6pyE8pWHllaIxp?hpkA`xF11rY8&j z6Rt2o(xL9^f*$RWlP&knCCUPm8+8q7Y^YQyPSRYiPWIDPFo!4Yb(AO^1 z)@<}u)XQIJB~lcl!ZtASTcaGgi8Hz?p26S0#~j4=HkC1Y;oK79Dmuu!XEpD` z&Pbga$K$9#*1va43nRBtQ3haq&)m=@OD5<9cBHcHxA~y+RV<3UlR|9AF;)xD%KIpO5IC0s#{=D(m}M^iPoN0@4)5xZ|1~n;g0I z5Pdu2lUmi6Gkp18Ld04B?j8Q2)mP6%g!qNLJz1}6{(~MkQ$}A!U&Ht+pL+C^gqo{s z+tO^;&nez5v?>=Xt-<6}>%yxBl=4OUqL}6mg~n%5m7o2dpdU^Tenk4j(5x9J3&#K+ z?sNe@xSbD$2G2#_bPJ#WdJGBGSB9aG^bQ{HovFfQ6xBHrGoM;$;6j0c#US=krt@o_ zpQy%hG&^C6XbK!2Ts1V1ce0nYH$=yZd&Ehds~reNTyTl^L<{ci7~x(sJ+U(^@(v=O`HDJBrH6 zd=!$wS(3rkE)ttf&JV}OT z4wxSPigBC!f_VGYF|mU)<9vJYk8d>d1euA8=j!a3Y`9nUu&xHFt6i)CcMizh#9R*@ zGg(uxZQo*~z~f6HsNh-Lb%OU`h-Cz6^>U)q-M8!hj;_W1wBsy~h|)pGrn<^!9Sm+c zANG$L7&Yhu(~zyN-QBjfx-QaJiN@(*K+RIM%M%kzELAtcq&uwQZG zdJHt3?~O@SXvT$;AW<9&Vw%*uyjTam#e|J8?M@0jE(PuLA$1v2Up$Sbba^|>+!gk0 z8^e`d%_A+5q{dZ0@Yn68KjE*%y&o)!L6r!U&nfm`*nd^TL&QNXLhf0owuRX~)QiPF zvtNaNJzok5*G7g1rHD1YQGIHYx6B80SL;I62LuNRs-S!OFGM%c9-)!6< zTG1IF661cE(_2xB`6FQkm{R)Jk7i;v+6jP;u0t2j?3l)6Ru(}KYNPvdBi8PSE_!}% z;F~XDsh6|8OZJ`hQo%JC>C{33-36K6dGBBE!JVK*qYBPPf*-9&K%l82t(@XM5~r z^8?>Yj(fzUy~QLo05%3-I?pb#iw9j$jImn?-*^4(-ugYP*-v(RwN4@r*phsVU&YXw zX~f8+)C&NvuU*|vstXQ6$Hnb{Pk}!;eEa(tbJkzIF&~X)|>xQ30^3cj1`Xba_eEUgt@%fhmf&2;F7NA>5E-FrD!bd<#F9^O`>)!C|b3bH24|pC8hg7{#}#VWGD`%@{ysM=CaH@AMd$ zFqEKyIdj%wQD%4dzEjZMR-YF*JQfXL$}4|CG<7=Fi9{(BSo(5){0%96RzL;wc~&s}u<1`HioPgqb`OQgmUVzVzGg z?0H{kmx?%x){OY{$8a+Z+dYNa_&{jPA^jnNMURy$YcM`rsI^^#v1-E~;;s9dqUpKG z=C7*k?oFq$Z=D@xfLK`(z*at!cxZLhYUa@kaJW4Yzp=|7$MB3RDIRS4QOS?5h&~bKrw1$vAu?TmC=lxj8TCsmQSjZiFMUG{s#KY{w6f$BZ zsLPZ6a<@7(EQq?F3D{qaApfa1O{3U3ZfYB_WAe4~Q%pJ%77V51GE#e6!`e1xG0KCM zarZsbbaS-HN<(FFBQs=Sr5r=iQP0%P<~Gnre&Qm13$t<2EIinyxE=*IiPk)dDgkNc zLLKaQy954Jw$o~!*tYowLZ1{6IEGE%I|B?Dd}1Bsv`N~xkZh81V!##*>ez_EUJnq5 z6HI?dT6F8W6@S66rle4RiQD8XR_~%NqqH0UZ zc(FHPCSr9wjIPRkF%H``>L$$+1juxE%VrJM_Az~-ijQM^8Xr&3EcFVkMIAIiQl2lu zZ{lze1>0sNo_xB(ds5y%ej~ky^j(_(uaUK7l!B#rJ7h~N!#+{<58uilbg~|3_wGY=xMs1th+VTC9F&1 zjkmMPtIreOo`xISLwQrYqb;ZbS)-}anYv8$yytAbll>K76I&k9mi!HvIl7Qav9}e0 z=6pfaWqp6tiE!8N7bZ6F%B1a&@wA;NcuOkOlSqRg%elnG0nA9?!u%L-R0K5df|Z7e z6(l_!eVy040uF3RgUr`M!h_)YHI9EXW8weJYg_AljpdOyhMt(_eK<=Bc+F< zQTlr}tKTy4P;V=x4GVG&QS;gXOMaHcY0EPleV|#3w>aigQVS4mQ$^wI~PkXJZtQ# zV^ztoTh_9P)Sq1Z>+Ki2&mmm87x2A`MhgNLVHTfd2Uwch?e|%<)^b(F6g)YzKD=^1 zaPe!HE8J$$atcg0d3&25v$rj~A@m9#OyC#^XW)1+-dVPXxuV~SKW7~jCD2Y>g|=T< zY)Xz@YQZ-j>U#=L1coj0qdkOMr?$HekEV8@xf^;YXIY+yH+rmru3DQMSHLS+H|IJ{ z$-rnVi*FEJmwlVmuF!IZ^SIaDX9G8NDg44-BowEJ@CA_f;8B*P8yth=8Qy`~2|iV& zX{wtT>N#;B4zI5+%wp^w$y>z(WZmroELPyQ_ri5Z za@Vfv1^hqi9qw z*WC-!b%O2?jdSt)OU%&&LATfK0Ya{x{InJ}hQ+m&tG)Ttr}PN?b>4@qK`uex^h?R5 z^09)}<*mAuT^4h-rdZM2pEw5bmhs705+DPG@5jdrRd4R+ep9m^w>2?w{M82mYl9oF zb#H>)Bp!()a8!p-#`gHF`Y{JsngPU6QIGASZEVQ#N#Mxf)cj29hm?(i0A{LD%E>|Lr4R^Z zeOUbHqH<IRKHLlwCQJvq3G18OJ^SZS(fp?sSD>CX0Jm!Q57Lf3VkO zh2}u4P4iKzt48YOSb{q{cyMgp1XAzrfvxf+GmT+B$~cB;5IHe@%TsqJI?$O+=*Qx+ z)-lZo>SQKuPPv&H$iuPjvbeeIQ{3x)BE_Y2IMEB+p=U3o;1+5j>dXHcB#5K@eN79 z%~@{hOVTJY*JWlj*P+ERjs!>nv=@N_x!;M^Wbs32E6U*d7RU6D;pDeAwz*lVm=qCP z%WKa+E^E5&46y`}bjHSE_~KV&yPem?ON6g5uFUk3KO@#soJ>+boc-(jAH zyA`)k+T0Nsc+BAAQ7rRd;uMxZ@0X{C663g&y(gD}Sd?9N?t=je(MLEiltS_oHv1OOgI$id-w9!t=xS#cpkCIr;mlru zn*!KS$?ok|DjuIO>9E@L)qRYE<<0oe_M)4kB5SG_0VR9Iv*yS9x;Gs2=N^Sku zH;gHXnzqjU8gT0QDfxb(JbUvCV}eZtD_-i`QUb3omzNuql;I+0!pQx&F+e7D1RdVa z_c*~rXQUzsl)&BMWL7;q)!rJ^TfvjHqtD`ScUe0(&yD+4*>22VtDdip{VKU_#CsN& z+wD5cc8P#%e@F8eV2o5AJi9L!O) zo9hcw)ZR-VA>Y~Ms>|_Iw}s*hi>`zjN1dZ<)Jxj0t~%-BjA>=digDhNy|IKax%D?2 ziCGUOm*(Pdx+S}m?E0Bqr&R_oH#dy>VdQO3No~FYPD>ek%-b^q>ydJ8u+t~0=9)fS zpFYI#oJR~kt|j&SwMnuU`}|QtC&%MfUvjQ^=xLyiS|9zx)SosH~l$}13W&>=W%eBU+qZlb!j+-vFd z${u3$w^#aIa7!rqhdat6im@cdNkNujnV#v}5-d~{)ZE^Ee_0t#x9wN3hu1M0WzNGP z$+_ie_Rh9r1=9}I8WmR5pRIz(&7+`Yntx(=G!mHaI9B#rX6Lv=->?O^~d zUZ?Kn(D~ED;~8(j;Kf|}gq}gfin~Sh>JcPu(EQ%}jEF|H9?{)-8B?Ozz)DfNi}Cx_ z>{6Gx$4O|4Iw&|MH2R|cs|^`&sm2G8TgS&_EVeYZBxnR*#{XV_RynA{kU|n<<{QL; zk?zM2Gg+RcC42cxq3XisPIIzrvG%5)QuPk+#J-JvgD;^9*;>O_?;+uoDnSWXJ%$+L zL#_}&VLYR?$(412ssPJv5Qjwih^j7@NX}8z6De~D>Q1$BEZEy~(sJqq71-tq!P=0p!oaEGF z>u^))C1x8@Vjne0pmhkFAhf9Umjx}n{t^^Am{)Xu7U>wohQ3h0i1g62{@afTlV%l; z;Ih^eN?wN{r|=z}p(mO`yDKVbX&9LU+ZEnCp9G z=q66?A?|RZLp(~MOQ&XiIH!m3d(=NR8iIZoSH1J-4YKgzf77MO4+LijA0dsc{OH+^ zasrwQ3|4Er{0&q^CW5T$pAW&u@+9nCCO6n?_n($Tq^}z1$^+?RT{~-{mLAb_>U-2% zlIHb&#W*?p*Sk}P5fV>+g6YW~G1N((v1+0cTHr5ku`sI!l;K=fAGRk9(weF<$K1WW z2W>RlKjV+GdSCT~;R)j2{SBe^vc6KK8;rU0cEj{G-vlp9Qo;G4Kx(*6k)0xjJW+R0 zw*W(o0@@e1cWY-dQN#{7HF=x+(=TINLQkKHA9smHW$}OZ;66^ZhLNfXf?P{~q_zGs zkNAuAf&76xm@UqRfrU&txN7_UD9cgSxcRZzE#hw}d3BxZ{@7U3_*uLu^=(yv(^+nt zC%39c-Y|z?YSLQgae4jid(Bb~>d>L2`tP_svB(v!}nisTdcmvKn5QOQ>gpOT6#D=@#e zHS_V=NU+zaIv>m>k@Y6}xhZPEAb9%+5@bgGp-Sh?YZ2VG@i)qZuc^%Iq4|2%@-62^ z@_99^smYG@D^r*E!{{nR;LTwF?crJQjzqlFuLIquBGT@oR3BRIZovCy!pD52C^18F zcqsvl;02*0w_!Y8kaVzq1B|S>V&<|{jnYk;s;M3nLlM}L8o^I4G0hYkg z`_jI{pJ6IK6DdlO*Zu~rafFa0c~?&2wB)^=##1uZ%hN0AQ!p&2MNcr^tlU{QkE?_2 zWS4}OshvR8_@C#_(mhf4N`?(5E^}5~fL&QPhDZ8+OM$|JNR{o^Bz7N#9Dk=KwzGwU zST?n7WR~#${)ecxx#7@I^vUlz-2(IqPaXBgMTF_>m1&OsxtuRnMz1~TghBYn2M9H( zz_h7&BbqVf%;T}}w?%&O5o8J8Z~a5<-gz&7c`jY$bUxdp)~o=(S0!Msv{S^USQT*AZ9GXKpnrO07( zdD$#`@#IL(O$B@-mxG_f8*peKK#efN=~G{VunM3rHB=>wHP^5Dg`D#0<@s-gF=KO_jn#p) zr*8v~JFl?r5L)cO8UHS?i5in50pE=r8pu{10bkGsxqtE)_Ao8g(XA%HzCU4kvXB%A zGGRsY)l}&GFpc6wMa;S}1Uv6>Ri4>aZ_-1dOaZMFi*4v2$9q3(hy|5T=?D~1m}dJu zw-1Ir)j7sfDf}P`Tv)r`jA1mdDqYqKS*?_wk&dudCd4K%hE-qq9W)WNo_(%|M75I@ z^WN`paQ8M$0Hks3w!Ky4A6&jpDoENzTI~U1eZO0NUfmqQIR>DJKTSXr+T|R zU6oz$yzUIS>lY`Ih`A3`wD+o2(OCk%56^O*4dWu5;QGNIGOmHtP0Hca4w=ucqEuW} zSFK#$2Q}?YWxS9)(H1L;-CNGdDN5(&a&_I^sk9^9Oduz)q5OaZC?o$PkUR%iYO<$2 z_InGQ5y{{p594i|Kb86%_il=(*-9}|GDMiY0kg`JdiTVm7K*1>rA^qv@20;Sf_cqE zTh3mNeYsD=q+V+}efe=GJAStjl&ntMLJl08I|N8+DY1}(>6yB=kytk(K}{i)V8e0z zvd+)$19RY5y_<9k*_AuctVOZ=LNNC;=dh{gAVyPRb)KDSlx<`4wtrBnRqDiY>W4?R z3Mv49YU=0gOhA|AGBW#|L$$;GU5+2bWKo;5HpoGro4udS&2vh&mjekSOT z^BqbR#Fxb2lS8_zY+T!h13;J;W{u$mCJIa%t`uZSbZ($9aFzFxPBl*fD@Lw`6z@%3 znJhy`Hof$Q@q)P!m}%~f!|c{9!G?Rv!o|{Me#C)OgfmSbvV3N_c5#p>8CGCqO_h#B z9(cHF1L^d-oY$1^93ew%0>UX6Aa%dpBYo@HM0uFbRpJgL<0Fa;R=samgHfd3X=-_q z5Ih&n?k+s{pog{RUF?l4D!>)ah>zP+N<$77fyrg3KTQh&bl1tLITN zdF*uJ_ola=fv?Rzn-zvpG>Ow@!%wKXv*pi)xeuYI|TUE^2z zj~*EY;}FPSUIfR6QCXw2kosLXbl#5_w2QOXb^73+T*ngYm^d0n+ek*khh4;Q47j=; zZ}fY5A-cW4jnz|V7XEt{a?*-i0=bpEAtd(o)Wyh+dWN9AhV6XfW1$j?<+Pv7j5jEp z#dmxZrRQT|+$cu5FI{qd5Zw%QbEspFr^nlo0)+{_jn|IxIQTx;LZ9Kyn(I0rTD0w% z?J6*QUmx31X4Isko9B3Hi6@_R(nu_qO?a{F9(MIb6oU?77O_pcWcd-f*KZMGc$ColcCyzD0R`b|JDv|>g zncQ^mo`b(6p|oPwtOP?#H+DAmy~8M4))<{T8~f1e%Wc;AMVw&i!@HE5Cw%a_x;L~K zm$P&`cGAAYZpz-e=;QIy%N@KijQj2#nUb`auv%PcZa#!TOdm(A^mQe+i=uSAbHQgH zFH487?LPI&cS$mike*wCer>KAxou2uo^<6N^bx1(V4KA&@}C+5Gy{Wy@ivuE5axP0 z*%Dij1jm->uiren;0rEJ7At5r+}W&2p8q@pcuA3544iSj`VOxvF2+xc`CbGHv+eY^ z>_e&@Caz>BQ00&tNdg8XWFqMfxCvy&yG1L@{ZC6b3rG7&I0P=fM_j#&!xW5O-(E-0>G~cKq+U67uC*dOTkz7KqBmAGa*&mu^go4e`x}IC6CfjYls)LClBh6EE#bl{GgHdge8f_ zK;iC}51w=c=Lp_LU97~{C0C+y+(TkLNJ1}*q7a0hEE)uSL&3+rc`iSXo3LJzV6$nP zsGQDoEBL&KF0~D&!0-jXwtI&T10M9V_szd3I8Eb9umF#<%Zgc2zHT>!NLQ&=7QfpA@+gm@`qm z4wt@V&5L!8n3mEZ+zBE=1N4V%W*tsZ9u~}1uhV}xUk31Ib-u$-p;nX)P1}9YR$cn$ z2h{scciXVkTP^#*538KP4X08bz-qdAjRL3Mx3pv70i#U%Q!8!!+MV-Dfo(uZ#BS(A zA~?LRJFaAzD6l}+z`}*RXSZlSBzv6FigR%OC*?Q>;E0MXF`}+Nn8)z3OZbCTeX`}_Z4&$u z*N7;LGDgXqE@$*!t5a$5knaz-aIG@Zw(U0`vZL!@jK2+?zlfw8{7gD>ug-dR?0|FW ziLKG-F=(P#=EpkD_o;pF)W^Z1*z|Tm4o-LNRikS&h{!(EU5C$k&B8TTOQ%9G_v#A6 z4^af!c##03>$mQ_k`+_5mCN>Ca;E01^=4IToP*!fYaE7e@X%y-9*NO;g}HS1hv$e* z*3!|OMH2zXhsV?#1El~2D}-(8?a#xXDZD5pp0rLaxZnG4aJ*!{P0a&=kQI}DlY~^l z-p`%m(Lbw@*HgmqMkDVCPvzpt4h*x~g`w22Y7xP*7v3fuiI zjP@KsgfD>Y#C{twJDy%x5iyMQx+NPsf9UuMfg4tUF@#2%NjmPHzdZi*$G5Lp;cX28 zm=1H*06pmS4Pmsdh(3Sos-qc7ZPvqZkB6h^K}R%AZ($NlWEppEnt#ZOB`HLT~hNu`2|?JA_;Ip%82mg#bt=e+P6~XP?Lk(pGp)&< z-Dz=5gkWD$VY2!*KgC0IfMf+7atZQ;XP)FydeE9Ww3n<$VrR4TPd`ibq0A8m?=tut zAv8~I|4+6rt*&(7COL8aiqC1d29X!62Qya>0Imu`@aL>l{9*iT%bkxa zj93%ZVf$YhdT-ih*O5Pt?mBI5pe0ey?ZmL#^C<%^oQc;$G`7EjsVb?$Ll4e*9K~ue zF^@ZS01p=H%U5`|ecTqFm-5P}|IvwADQNno)U$4}>h;&Hnw|0zsSx9H_2kkKGCWQB z-)Tn=*e)$_Ex+al5oGzZIiH4`Lh-&~iDmg%8|2|6dS+mv+)+Eq*1UCgFA=xcoz(y6 zL-Qw}b~VLEvC1|cCPFGlF7`ye{5p-lVE&_EYZj%%cwfyLn;n^N+Zp-4v03;`fmdP~%a^^(kvqFNu+_brRCs3-6es zlXOWWz-}VYrTXsEaAto}>_?d^6@|b5CFE&+~ZmU0|`)(_Q zsLv6v$jA*68Iz#XSY($W-k@$V6&y>Fq11$2iQ0pg0!lMw2T`ha-VJPTr_KJDN(tf{ zQ7L7D_8R45bPrT&1U7WaJaQ(xwRkQp6z-t#`fX7;VzFiHb9gC>z}5ZtnxbjQCaeT_ z-Q{!Tgj(!g<)7#Dr~RTxwV$VyUK9G3f4eLQzj6XIH|1eRi+YcR;_cc?9na2-wYXdn zz@(SHWx~+l4%&<*_yA4b!r)B;B?>YZNy2``qTYMQ5*j{sV(DPe)?)?QE1VIhE4@#` zd_8?i*#zP(Z^d%BgSp;11zP=y15jaYJwR1s@}qWc9HOb@6YRG%3D7+`M2e%Tpx}@$ z*mldw34VaUyS}f_=Nn;bdH669SXi_<^}nd5VCdn@BFHDL0_)F2#ed=z`UCod21Rr) zZ)s?qUEgM2X>_-X=v7+2$No%ALme`C29b)J67D>Ux2LVft|DQF$nOlNCpl?j14p)J z293xFF;{)>2rdZHI9FlZGuY6>VE1b z__#0F#-OMNM^Dnkfimp>k%quO(ulG%upm>M+TRNu2NDCxVWI2~xhV$Qr2)bRSsH4Y+`XXZad%Z2e`aCm&WRJq!qc}F#i%zLPL0RXNGORVZlJi1LP zaeu4#(#hA(t2De=o~cIoqS^^~XyfiH8&fgfF_{^VGP3`;AA=nueeiyz3MN~(sHBMx zy~y=0uq`Aj=Mj_kNKR6YfRj?hPwQ@1n=c$!+k8gw2%(Y&C(a%MLutXfb1A@Z{S zA;~NWNH@1u=k5O1Ce`Nra(xUG6Vd1VR9Vi5j(Wq+oqb3+|mhU3PpI!nT@U1zcQvVxON(*%>A4Yt-Aj zQPUB{%e{h_9!pILVtsEEt|0ph0aU@P!8EcsZqVD##c%UC@EC)7iBD|F&&Fu6u-t2N zMe~3}6MnH1?!3%rM_5J($Z#`ES3+9H*!j9P+iD@RhufT1crj~;4^K9vAtZFxxuVu2 zY;((hPvM7$)3;Y^pNPToRdkJ;L+YP*m>hOk0qQLrD>jBl2L&YFlLV#FPg!s;gl^kZ zbBiFkRbgtrx1^_IE+wR+9FnxeKM+6R7Ny?A`(l7br5aWRCm}Dz|9$jG*A?_-uyL<1 zI-(*+gIM_36P70%?v5eR}Ngav*lJcVDvtpk=WLPbp z^Yk{PW8}O`tM)-#BfB)QzR2=Gju4KfBqkqAimzxEf^#>L$Nuqt85c9*PK!dBx;+); zKj{$EU}v$w{*o4uaH-GEusNJ5DzWJDxg+!+Rl~R7-%j{_0F^*~JT0Js7MT|{i zzLz+Sw~6$30!Wi2WEEBN6*3soL-^2Jg#BOj|3~uw97Z2%G!$9=jOybsSQ8&d>scU8h0){d;Xtu{QDdQe3<2S0ll5F|5r}bG%$&A(k-^% zEdL+jL1kN_F2Fms%NwKdXZm17K6X~zko)OIn<4UB4WV_^AG?ELd!`71zi&}!d(&KI zK-UBjRqSlb=S%I8`AB2c#qFK9ej^6f8EaRMr6a}){m={TKas!U8b0YA9(bXz{Jxen zDEo*f_p^J8{$r@p=(P&C26orOtHkLJa<3_iAr)(tZB zCiCxYPJ$%xfjYd8-AV1L{Pw~zMKy7Xr_+j)4gv-j_F3(!a$55D%9c zE|+ET<#irUYOES6v}DLW4 zpe>ZJtDjZ^B!DbQideBCYgik0s#vuiZ8N=@IvcnTYLjPeO~_PU*G-j+~+y>%$b=p^P4ks?rEX+9XV?if)>S( zh0OB#bz$sdq4(3%dq}d5IdUUav819bP+HI>ABnj)^{X-e8aqoJOifY|#4OQBl$+eB zmdmTWi9~LOw76iEbGqS>_R=IXpan^pW1lnR(J>ll==qkvsou(7hMqlm`tl z3-7EB7n#=+z#^gh;6ag_eGdi4c_+Ky4k^q{q6$LZiuxZF22lN(0H0A0lU`4F;PjU5 zKy4tD_*z*i0&y-RyAX(x%4tkn zp$=OZ*LvW$ZHHD_bMqE4Q?%R^SJLBHhnBmSc1}zO#xoEpO&uaJeypz= zolpg}NtA6)2_VHPR{B2hyG=Q18a)**tk<{FZoW-_N;SvSLZYYwS#Z=L3)?ZstEbqm zQ5M-uj76?=|B*QqTWy96pQ;-%{)jU^f8NMvpKo3D5JODpl5lH0W#3?DqGX%WPk&A313f4 z@ey0a-tB$`$m$08a}pB#jTY>RSmL2wR-%93*;FqL)AW*&dNZofG|vzbtTbf;o`M(h zeWbw2Z5kr~=o%!Eo0ASO30PY1RCk2f2B$Q`7GCThVbhfxhg6BZq3I=~vGd>QvO(-`4$_?e+=Rt$FXuk!JBmUpEx&*~7iL z0ZT+5Q2XxaN#S7m-=JtMie;#w$aZc{+1%H_COWpm9QSO2&lCNb<#gjH`y}Z3#|%_=uOa&6?3u=}(sB2XPm4}3 zyd;0hQpBh3%v)JpUb;Q8VKRAzR=_ZO6IY_-LaYRxkXEW(Z<;YXDwvOHzVxY(ca_Oue z0BO|hz6$)CEKU={O)#KKI;-S-9voCR(#6R>;TYc$XDKtc*=?#3k(HTvwfi$?9Q=ac z|0e7~;j3JNbc}g@6~q`0x+0iB&-K~1t3sQN|1+m@&q1kL$7=+5RcnY zyo(-nN+4s2i{-je1xm;oqiX@Wf*m@bsI`p}8YKy|BtpkyETL=jjPvu@W)l|(rOP=>i8R9P6cLu}Db?;XDa#Q#hf$Rz_w8|hqgTIsJ}j3oU0V#3 ztl&&YL5*(f&`A@t{mPOCTEXQr127IB*U;c^&`X4r{(?rHE*Qk0uEVCZ$l@SmqQNqK zLWh==%EL+VVd^8Lu#k1}Z_@dnC$pP2^Jmnp^1Ms_5Pg{?QSc<%zeMwUqKLUGsELhk}xM3N$MgC5UWt|V+WJa zhEawjh~)_GAFeD~RNq7ZVTyGTG`cGRC}o9~e2t7DAEsnJ&I8QhyP++q0}T8db`~Sw zzhR^h=o~YUcvUL8lU$i)*i;aVd6hlN;K6~Is(=XP7d$Q6P#N4cLzQilL1mXbj6}7& zU|Lmv^VQ78KB5o+JKUz|fsI^6Z3gSw&==|j91UBu6ms0)H<=IKeVG?gFh-$b4ShOb zY2*}fckqC+RuC+LhEpxZz7K6RFY_$*Cu5LvI7NinRGq9}}pyp8>DVQjK~vNQKO< z-!GkK|9bBdFgrAiJ29tQIpSXryg{DIRx+qkddB2|AKc<>Db#;^#*1wh$tV;ppb-E{ zbw+UUv}2rJ0g_!3B2X3?A#sG{vOdk8TE6f=s&2tzsTX@JQB$jGm}WXGhZQ)Bh)J6*v2*|`~=UUed=9Yus4tM^oR@Xj*tgN8n*+Mw}<&EF~d}VqaK9=rEN$1N6 z!mEAW!2W2m;jdVOvc88VKg9|ih=UX|-TX2*@IaEd4Yd!k<<(9^Wj~2X=6DUsYbt#! zMKzX0n~6y~FtmZt6VVt@Lx6FV9pTJ-)-CrX13B}*;aDtGq@)u2gV0Nk`9KBjn5x2B zIV#D{Ft$b_)6ks6r2*+~)&U(}6)@ZHP5>paBg4~fG5se00f{Ow8)=+%S^%nnk-7sA05q!${ZD)rR~ZhQFTs71faO#oq*}De%x!yU+r< ifKk5TrGfU=)qUdjCyOaRKW_g_{dl_iy3{*|fBzrAbPV+X diff --git a/scalnet/.travis.yml b/scalnet/.travis.yml deleted file mode 100644 index b6879f83d..000000000 --- a/scalnet/.travis.yml +++ /dev/null @@ -1,11 +0,0 @@ -language: scala -scala: dummy -jdk: oraclejdk8 - -script: - - sbt ++2.11.12 clean compile test - - sbt ++2.10.7 clean compile test - - ./change-scala-versions.sh "2.11" - - mvn -q clean package - - ./change-scala-versions.sh "2.10" - - mvn -q clean package From 966642c1c9b0fd60ef98b8d7a0f6804ca45d7794 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 30 May 2020 21:13:33 +0300 Subject: [PATCH 17/21] Rng tweaks (#479) * initial commit Signed-off-by: raver119@gmail.com * Java Random.getFloat()/getDouble() methods mapped to C++ Signed-off-by: raver119@gmail.com * Refactored relativeT for float and double data types. Signed-off-by: shugeo * Refactored float relativeT method. Signed-off-by: shugeo * Refactored relativeT Signed-off-by: shugeo * - additional rng tests - float/double uniform generation methos slightly changed Signed-off-by: raver119@gmail.com * use bitset instead of manual conversion Signed-off-by: raver119@gmail.com * rollback valueBits changes Signed-off-by: raver119@gmail.com * remove unused shapelist Signed-off-by: raver119@gmail.com * update KMeans ground truth test Signed-off-by: raver119@gmail.com * dedicated union to make MSVC happy Signed-off-by: raver119 * minor tweaks Signed-off-by: raver119 * .seh_savexmm workaround? Signed-off-by: raver119 * don't use march=native in tests on windows Signed-off-by: raver119 Co-authored-by: shugeo --- .../clustering/kmeans/KMeansTest.java | 8 +- libnd4j/include/array/NDArray.hXX | 6 +- libnd4j/include/graph/RandomGenerator.h | 103 ++++++++++-------- libnd4j/include/helpers/StringUtils.h | 11 ++ libnd4j/include/helpers/impl/BitwiseUtils.cpp | 22 ++-- libnd4j/include/helpers/impl/StringUtils.cpp | 26 +++++ libnd4j/include/legacy/NativeOps.h | 2 + libnd4j/include/legacy/cpu/NativeOps.cpp | 8 ++ libnd4j/include/legacy/cuda/NativeOps.cu | 8 ++ .../generic/images/resize_images.cpp | 1 - libnd4j/include/types/u32.h | 40 +++++++ libnd4j/tests_cpu/layers_tests/CMakeLists.txt | 4 +- libnd4j/tests_cpu/layers_tests/RNGTests.cpp | 103 +++++++++++++++++- .../tests_cpu/layers_tests/StringTests.cpp | 12 ++ .../java/org/nd4j/nativeblas/NativeOps.java | 2 + .../main/java/org/nd4j/rng/NativeRandom.java | 8 +- .../linalg/jcublas/rng/CudaNativeRandom.java | 10 ++ .../java/org/nd4j/nativeblas/Nd4jCuda.java | 13 ++- .../cpu/nativecpu/rng/CpuNativeRandom.java | 10 ++ .../java/org/nd4j/nativeblas/Nd4jCpu.java | 13 ++- .../java/org/nd4j/linalg/rng/RandomTests.java | 22 ++++ 21 files changed, 350 insertions(+), 82 deletions(-) create mode 100644 libnd4j/include/types/u32.h diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java index abbfa04bc..e01274a71 100644 --- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java +++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java @@ -273,10 +273,10 @@ public class KMeansTest extends BaseDL4JTest { ClusterSet clusterSet = kMeansClustering.applyTo(points); double[] centroid1 = {2.44e8, 2.71e8, 2.98e8, 3.25e8}; - double[] centroid2 = {5.14e8, 5.41e8, 5.68e8, 5.95e8}; - double[] centroid3 = {1000000.0, 2.8E7, 5.5E7, 8.2E7}; - double[] centroid4 = {7.03E8, 7.3E8, 7.57E8, 7.84E8}; - double[] centroid5 = {3.79E8, 4.06E8, 4.33E8, 4.6E8}; + double[] centroid2 = {1000000.0, 2.8E7, 5.5E7, 8.2E7}; + double[] centroid3 = {5.95E8, 6.22e8, 6.49e8, 6.76e8}; + double[] centroid4 = {3.79E8, 4.06E8, 4.33E8, 4.6E8}; + double[] centroid5 = {5.5E7, 8.2E7, 1.09E8, 1.36E8}; assertArrayEquals(centroid1, clusterSet.getClusters().get(0).getCenter().getArray().toDoubleVector(), 1e-4); assertArrayEquals(centroid2, clusterSet.getClusters().get(1).getCenter().getArray().toDoubleVector(), 1e-4); diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 773d845ab..9e48b05de 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -1671,11 +1671,11 @@ void NDArray::printLinearBuffer() const { } else if(this->dataType() == sd::DataType::FLOAT32) { for(Nd4jLong e = 0; e < len; e++) - printf("%.3f, ", this->bufferAsT()[e * ews]); + printf("%.8f, ", this->bufferAsT()[e * ews]); } else if(this->dataType() == sd::DataType::DOUBLE) { for(Nd4jLong e = 0; e < len; e++) - printf("%.3f, ", this->bufferAsT()[e * ews]); + printf("%.8f, ", this->bufferAsT()[e * ews]); } else throw std::invalid_argument("NDArray::printLinearBuffer: not implemented yet for this data type !"); @@ -1773,7 +1773,7 @@ void NDArray::printIndexedBuffer(const char* msg, Nd4jLong limit) const { if (this->isZ()) printf("%lld\n", this->e(0)); else if (this->isR()) - printf("%f\n", this->e(0)); + printf("%.8f\n", this->e(0)); else if (this->isB()) { printf("%s\n", this->e(0)?"true":"false"); } diff --git a/libnd4j/include/graph/RandomGenerator.h b/libnd4j/include/graph/RandomGenerator.h index ef06c345d..407993a09 100644 --- a/libnd4j/include/graph/RandomGenerator.h +++ b/libnd4j/include/graph/RandomGenerator.h @@ -22,6 +22,7 @@ #define LIBND4J_GRAPH_RNG_H #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #ifdef __CUDACC__ #include @@ -79,9 +81,9 @@ namespace sd { */ static FORCEINLINE Nd4jLong currentMilliseconds(); - - FORCEINLINE _CUDA_HD uint32_t xoroshiro32(Nd4jLong index); - FORCEINLINE _CUDA_HD uint64_t xoroshiro64(Nd4jLong index); + public: + FORCEINLINE _CUDA_HD uint32_t xoroshiro32(uint64_t index); + FORCEINLINE _CUDA_HD uint64_t xoroshiro64(uint64_t index); /** * This method returns integer value between 0 and MAX_UINT @@ -119,7 +121,7 @@ namespace sd { FORCEINLINE _CUDA_HD int relativeInt(Nd4jLong index); FORCEINLINE _CUDA_HD Nd4jLong relativeLong(Nd4jLong index); - FORCEINLINE _CUDA_HD void rewindH(Nd4jLong steps); + FORCEINLINE _CUDA_HD void rewindH(uint64_t steps); /** * These methods set up only node states, with non-changed root ones @@ -172,6 +174,24 @@ namespace sd { return v; } + template <> + _CUDA_HD FORCEINLINE float RandomGenerator::relativeT(Nd4jLong index) { + u32 u; + u._u32 = (0x3f800000 | (this->xoroshiro32(index) >> 9)); + return u._f32 - 1.0f; + } + + template <> + _CUDA_HD FORCEINLINE double RandomGenerator::relativeT(Nd4jLong index) { +#ifdef __DOUBLE_RNG__ + u64 u; + u._ulong = ((UINT64_C(0x3FF) << 52) | (this->xoroshiro64(index) >> 12)); + return u._double - 1.0; +#else + return (double) relativeT(index); +#endif + } + template <> _CUDA_HD FORCEINLINE uint64_t RandomGenerator::relativeT(Nd4jLong index) { return this->xoroshiro64(index); @@ -184,16 +204,14 @@ namespace sd { template <> _CUDA_HD FORCEINLINE int RandomGenerator::relativeT(Nd4jLong index) { - auto x = this->relativeT(index); - auto r = static_cast(x % DataTypeUtils::max()); - return r; + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } template <> _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeT(Nd4jLong index) { - auto x = this->relativeT(index); - auto r = static_cast(x % DataTypeUtils::max()); - return r; + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } template @@ -220,24 +238,18 @@ namespace sd { template _CUDA_HD FORCEINLINE T RandomGenerator::relativeT(Nd4jLong index) { // This is default implementation for floating point types -#ifdef __DOUBLE_RNG__ - auto i = static_cast(this->relativeT(index)); - auto r = i / static_cast(DataTypeUtils::max()); - return static_cast(r); -#else - auto i = static_cast(this->relativeT(index)); - auto r = i / static_cast(DataTypeUtils::max()); - return static_cast(r); -#endif + return static_cast(relativeT(index)); } _CUDA_HD FORCEINLINE int RandomGenerator::relativeInt(Nd4jLong index) { - return relativeT(index); + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeLong(Nd4jLong index) { - return relativeT(index); + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } ////// @@ -249,23 +261,12 @@ namespace sd { return (x << k) | (x >> (64 - k)); } - _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(Nd4jLong index) { - - auto s0 = _rootState._ulong; - auto s1 = _nodeState._ulong; - - // xor by idx - s0 |= ((index + 2) * (s1 + 24243287)); - s1 ^= ((index + 2) * (s0 + 723829)); - - unsigned long val = 0; - val = s1 ^ s0; - int* pHalf = reinterpret_cast(&val); - - return rotl(*pHalf * 0x9E3779BB, 5) * 5; + static FORCEINLINE _CUDA_HD uint32_t next(uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3) { + const uint32_t result = rotl(s0 + s3, 7) + s0; + return result; } - _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(Nd4jLong index) { + _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(uint64_t index) { auto s0 = _rootState._ulong; auto s1 = _nodeState._ulong; @@ -273,23 +274,29 @@ namespace sd { s0 |= ((index + 2) * (s1 + 24243287)); s1 ^= ((index + 2) * (s0 + 723829)); - // since we're not modifying state - do rotl step right here - s1 ^= s0; - s0 = rotl(s0, 55) ^ s1 ^ (s1 << 14); - s1 = rotl(s1, 36); + unsigned long val = 0; + val = s1 ^ s0; + int* pHalf = reinterpret_cast(&val); - return s0 + s1; + return rotl(*pHalf * 0x9E3779BB, 5) * 5; } - _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(Nd4jLong steps) { - auto s0 = _nodeState._du32._v0; - auto s1 = _nodeState._du32._v1; + _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(uint64_t index) { + uint64_t upper = ((uint64_t) xoroshiro32(index)) << 32; + uint32_t lower = xoroshiro32(sd::math::nd4j_rotl(index, 32)); + return upper + lower; + } - s1 ^= s0; - _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b - _nodeState._du32._v1 = rotl(s1, 13); // c + _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(uint64_t steps) { + // we only update node state, if any + auto s0 = _nodeState._du32._v0; + auto s1 = _nodeState._du32._v1; - _nodeState._long ^= (steps ^ 0xdeadbeef); + s1 ^= s0; + _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b + _nodeState._du32._v1 = rotl(s1, 13); // c + + _nodeState._long ^= (steps ^ 0xdeadbeef); } } } diff --git a/libnd4j/include/helpers/StringUtils.h b/libnd4j/include/helpers/StringUtils.h index ef9586637..e5f9f2990 100644 --- a/libnd4j/include/helpers/StringUtils.h +++ b/libnd4j/include/helpers/StringUtils.h @@ -44,6 +44,14 @@ namespace sd { return os.str(); } + /** + * These methods convert integer values to string with 0s and 1s + * @param value + * @return + */ + template + static std::string bitsToString(T value); + /** * This method just concatenates error message with a given graphId * @param message @@ -137,6 +145,9 @@ namespace sd { * @return boolean status */ static bool u32StringToU8String(const std::u32string& u32, std::string& u8); + + template + static std::string vectorToString(const std::vector &vec); }; } diff --git a/libnd4j/include/helpers/impl/BitwiseUtils.cpp b/libnd4j/include/helpers/impl/BitwiseUtils.cpp index e3f4ce92a..9bd3fa8cf 100644 --- a/libnd4j/include/helpers/impl/BitwiseUtils.cpp +++ b/libnd4j/include/helpers/impl/BitwiseUtils.cpp @@ -49,31 +49,29 @@ namespace sd { return -1; } - std::vector BitwiseUtils::valueBits(int holder) { std::vector bits; if (holder == 0) { - for (int e = 0; e < 32; e++) - bits.emplace_back(0); + for (int e = 0; e < 32; e++) + bits.emplace_back(0); - return bits; + return bits; } - #ifdef REVERSE_BITS for (int e = 32; e >= 0; e--) { #else for (int e = 0; e < 32; e++) { #endif - bool isOne = (holder & 1 << e) != 0; + bool isOne = (holder & 1 << e) != 0; - if (isOne) - bits.emplace_back(1); - else - bits.emplace_back(0); - } + if (isOne) + bits.emplace_back(1); + else + bits.emplace_back(0); + } - return bits; + return bits; } sd::ByteOrder BitwiseUtils::asByteOrder() { diff --git a/libnd4j/include/helpers/impl/StringUtils.cpp b/libnd4j/include/helpers/impl/StringUtils.cpp index 5ac2fd8cc..757def763 100644 --- a/libnd4j/include/helpers/impl/StringUtils.cpp +++ b/libnd4j/include/helpers/impl/StringUtils.cpp @@ -21,7 +21,9 @@ // #include +#include #include +#include namespace sd { static FORCEINLINE bool match(const uint8_t *haystack, const uint8_t *needle, uint64_t length) { @@ -32,6 +34,17 @@ namespace sd { return true; } + template + std::string StringUtils::bitsToString(T value) { + return std::bitset(value).to_string(); + } + +template std::string StringUtils::bitsToString(int value); +template std::string StringUtils::bitsToString(uint32_t value); +template std::string StringUtils::bitsToString(Nd4jLong value); +template std::string StringUtils::bitsToString(uint64_t value); + + uint64_t StringUtils::countSubarrays(const void *vhaystack, uint64_t haystackLength, const void *vneedle, uint64_t needleLength) { auto haystack = reinterpret_cast(vhaystack); auto needle = reinterpret_cast(vneedle); @@ -155,4 +168,17 @@ namespace sd { return true; } + template + std::string StringUtils::vectorToString(const std::vector &vec) { + std::string result; + for (auto v:vec) + result += valueToString(v); + + return result; + } + + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); } diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h index 17affd1c3..c72b0d535 100755 --- a/libnd4j/include/legacy/NativeOps.h +++ b/libnd4j/include/legacy/NativeOps.h @@ -1606,6 +1606,8 @@ ND4J_EXPORT OpaqueRandomGenerator* createRandomGenerator(Nd4jLong rootSeed = 0, ND4J_EXPORT Nd4jLong getRandomGeneratorRootState(OpaqueRandomGenerator* ptr); ND4J_EXPORT Nd4jLong getRandomGeneratorNodeState(OpaqueRandomGenerator* ptr); ND4J_EXPORT void setRandomGeneratorStates(OpaqueRandomGenerator* ptr, Nd4jLong rootSeed = 0, Nd4jLong nodeSeed = 0); +ND4J_EXPORT float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator* ptr, Nd4jLong index); +ND4J_EXPORT double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT int getRandomGeneratorRelativeInt(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT Nd4jLong getRandomGeneratorRelativeLong(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT void deleteRandomGenerator(OpaqueRandomGenerator* ptr); diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index 799351ccc..ae8a22a6a 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -2832,6 +2832,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed ptr->setStates(rootSeed, nodeSeed); } +float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + +double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) { return ptr->relativeInt(index); } diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 8be9b3bfd..465029207 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -3515,6 +3515,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed ptr->setStates(rootSeed, nodeSeed); } +float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + +double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) { return ptr->relativeInt(index); } diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp index c3f9ae8f1..18d048450 100644 --- a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp +++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp @@ -81,7 +81,6 @@ namespace sd { } DECLARE_SHAPE_FN(resize_images) { - auto shapeList = SHAPELIST(); auto in = inputShape->at(0); Nd4jLong* outputShape; diff --git a/libnd4j/include/types/u32.h b/libnd4j/include/types/u32.h new file mode 100644 index 000000000..115b207cb --- /dev/null +++ b/libnd4j/include/types/u32.h @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@protonmail.com +// +#ifndef SD_U32_H +#define SD_U32_H + +#include +#include + + +namespace sd { + union u32 { + bool _bool; + int8_t _s8; + uint8_t _u8; + int16_t _s16; + uint16_t _u16; + int32_t _s32; + uint32_t _u32; + float _f32; + }; +} + +#endif \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 5ae202542..563bf58f6 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -45,7 +45,7 @@ if (APPLE) set(CMAKE_CXX_FLAGS " -fPIC -D__APPLE_OS__=true") elseif(WIN32) if (SD_CPU) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx -mavx2 -O3") endif() if (SD_CPU AND LINUX) @@ -61,7 +61,7 @@ else() endif() if (SD_CPU AND SD_SANITIZE) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -fsanitize=address") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") else() # CUDA? endif() diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index 37facc43c..469cc77be 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -49,8 +49,8 @@ public: //_bufferB = new Nd4jLong[100000]; //_rngA = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); //_rngB = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferB); - _rngA.setStates(_seed, _seed); - _rngB.setStates(_seed, _seed); + _rngA.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7); + _rngB.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7); nexp0->assign(-1.0f); nexp1->assign(-2.0f); nexp2->assign(-3.0f); @@ -204,6 +204,9 @@ TEST_F(RNGTests, Test_Uniform_1) { RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x0, 1.0f, 2.0f); RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngB, &x1, 1.0f, 2.0f); + x0.printLinearBuffer(); + x1.printLinearBuffer(); + ASSERT_TRUE(x0.equalsTo(&x1)); ASSERT_FALSE(x0.equalsTo(nexp0)); @@ -212,10 +215,82 @@ TEST_F(RNGTests, Test_Uniform_1) { for (int e = 0; e < x0.lengthOf(); e++) { float v = x0.e(e); + nd4j_printf("%f\n", v); ASSERT_TRUE(v >= 1.0f && v <= 2.0f); } } +TEST_F(RNGTests, Test_Uniform_10) { + auto x = NDArrayFactory::create('c', {10000, 10000}); + auto z = NDArrayFactory::create(0.0f); + + RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f); + + sd::ops::reduce_max op; + auto status = op.execute({&x}, {&z}); + ASSERT_EQ(Status::OK(), status); + + ASSERT_LT(z.t(0), 1.0f); +} + +TEST_F(RNGTests, Test_Uniform_10_double) { + auto x = NDArrayFactory::create('c', {10000, 10000}); + auto z = NDArrayFactory::create(0.0f); + + RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f); + + sd::ops::reduce_max op; + auto status = op.execute({&x}, {&z}); + ASSERT_EQ(Status::OK(), status); + + ASSERT_LT(z.t(0), 1.0); +} + +TEST_F(RNGTests, Test_Uniform_11) { + uint32_t max = 0; + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.xoroshiro32(e) >> 8; + if (v > max) + max = v; + } + + nd4j_printf("Max value: %i\n", (int) max); +} + +TEST_F(RNGTests, Test_Uniform_12) { + float max = -std::numeric_limits::infinity(); + float min = std::numeric_limits::infinity(); + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.relativeT(e); + if (v > max) + max = v; + + if (v < min) + min = v; + } + + nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min); + ASSERT_LT(max, 1.0f); + ASSERT_GE(min, 0.0); +} + +TEST_F(RNGTests, Test_Uniform_13) { + double max = -std::numeric_limits::infinity(); + double min = std::numeric_limits::infinity(); + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.relativeT(e); + if (v > max) + max = v; + + if (v < min) + min = v; + } + + nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min); + ASSERT_LT(max, 1.0); + ASSERT_GE(min, 0.0); +} + TEST_F(RNGTests, Test_Uniform_3) { auto x0 = NDArrayFactory::create('c', {1000000}); @@ -258,8 +333,8 @@ TEST_F(RNGTests, Test_Gaussian_1) { } TEST_F(RNGTests, Test_Gaussian_21) { - auto x0 = NDArrayFactory::create('c', {10, 10}); - auto x1 = NDArrayFactory::create('c', {10, 10}); + auto x0 = NDArrayFactory::create('c', {1000, 1000}); + auto x1 = NDArrayFactory::create('c', {1000, 1000}); RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngA, &x0, 0.0f, 1.0f); RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngB, &x1, 0.0f, 1.0f); @@ -983,6 +1058,26 @@ TEST_F(RNGTests, Test_UniformDistribution_04) { } +TEST_F(RNGTests, Test_UniformDistribution_05) { + auto x = NDArrayFactory::create('c', {2}, {10000, 10000}); + auto al = NDArrayFactory::create(0.f); + auto be = NDArrayFactory::create(1.f); + auto exp0 = NDArrayFactory::create('c', {10000, 10000}); + + + sd::ops::randomuniform op; + auto result = op.evaluate({&x, &al, &be}, {}, {},{}, {DataType::FLOAT32}); + ASSERT_EQ(Status::OK(), result.status()); + + auto z = result.at(0); + ASSERT_TRUE(exp0.isSameShape(z)); + ASSERT_FALSE(exp0.equalsTo(z)); + + sd::ops::reduce_max checkOp; + auto checkResult = checkOp.evaluate({z}); + checkResult[0]->printIndexedBuffer("Max on uniform with 0 to 1 on 100M cases is"); +} + namespace sd { namespace tests { static void fillList(Nd4jLong seed, int numberOfArrays, std::vector &shape, std::vector &list, sd::graph::RandomGenerator *rng) { diff --git a/libnd4j/tests_cpu/layers_tests/StringTests.cpp b/libnd4j/tests_cpu/layers_tests/StringTests.cpp index 272c410c7..41352246e 100644 --- a/libnd4j/tests_cpu/layers_tests/StringTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/StringTests.cpp @@ -25,6 +25,8 @@ #include #include "testlayers.h" #include +#include +#include using namespace sd; @@ -863,3 +865,13 @@ TEST_F(StringTests, Basic_cast_UTF8toUTF32) { ASSERT_EQ(u8, z0); ASSERT_EQ(u32, z1); } + +TEST_F(StringTests, test_bit_string_1) { + // check bits -> vector conversion first + auto vec = BitwiseUtils::valueBits(1); + + // check bits -> string conversion next; + auto str = StringUtils::bitsToString(1); + ASSERT_EQ(32, str.length()); + ASSERT_EQ(std::string("00000000000000000000000000000001"), str); +} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index c7789d7dc..ae9ff1e94 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -1154,6 +1154,8 @@ public interface NativeOps { long getRandomGeneratorRootState(OpaqueRandomGenerator ptr); long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); + float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); + double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); void deleteRandomGenerator(OpaqueRandomGenerator ptr); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java index 563fe2e45..04f9c7499 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java @@ -107,14 +107,10 @@ public abstract class NativeRandom implements Random { } @Override - public float nextFloat() { - return (float) nextInt() / (float) Integer.MAX_VALUE; - } + public abstract float nextFloat(); @Override - public double nextDouble() { - return (double) nextInt() / (double) Integer.MAX_VALUE; - } + public abstract double nextDouble(); @Override public double nextGaussian() { diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java index edb5d291a..e5067c9c9 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java @@ -81,6 +81,16 @@ public class CudaNativeRandom extends NativeRandom { return seed; } + @Override + public float nextFloat() { + return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + + @Override + public double nextDouble() { + return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + @Override public int nextInt() { return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 59496d780..ad9503849 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -3098,6 +3098,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr); +public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); +public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native void deleteRandomGenerator(OpaqueRandomGenerator ptr); @@ -5048,6 +5050,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include // #ifdef __CUDACC__ // #endif @@ -5064,6 +5067,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); return (RandomGenerator)super.position(position); } + public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index); + public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index); public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); } private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public RandomGenerator() { super((Pointer)null); allocate(); } @@ -5094,7 +5099,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native int relativeInt(@Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index); - public native void rewindH(@Cast("Nd4jLong") long steps); + public native void rewindH(@Cast("uint64_t") long steps); /** * These methods set up only node states, with non-changed root ones @@ -5126,6 +5131,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); + + + + @@ -5141,6 +5150,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k); + @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3); + diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java index 2a2cff200..96219e8ff 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java @@ -75,6 +75,16 @@ public class CpuNativeRandom extends NativeRandom { return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); } + @Override + public float nextFloat() { + return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + + @Override + public double nextDouble() { + return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + @Override public long nextLong() { return nativeOps.getRandomGeneratorRelativeLong((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index b9e4adb5a..402b096c6 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -3102,6 +3102,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr); +public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); +public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native void deleteRandomGenerator(OpaqueRandomGenerator ptr); @@ -5052,6 +5054,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include // #ifdef __CUDACC__ // #endif @@ -5068,6 +5071,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); return (RandomGenerator)super.position(position); } + public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index); + public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index); public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); } private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public RandomGenerator() { super((Pointer)null); allocate(); } @@ -5098,7 +5103,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native int relativeInt(@Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index); - public native void rewindH(@Cast("Nd4jLong") long steps); + public native void rewindH(@Cast("uint64_t") long steps); /** * These methods set up only node states, with non-changed root ones @@ -5130,6 +5135,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); + + + + @@ -5145,6 +5154,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k); + @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3); + diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java index d784fb390..4e885db96 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java @@ -1514,6 +1514,28 @@ public class RandomTests extends BaseNd4jTest { assertEquals(res[0], res1[0]); } + + @Test + public void testRandom() { + val r1 = new java.util.Random(119); + val r2 = Nd4j.getRandom(); + r2.setSeed(119); + float jmax = 0.0f; + float nmax = 0.0f; + for (int e = 0; e < 100_000_000; e++) { + val f = r1.nextFloat(); + val n = r2.nextFloat(); + if (f > jmax) + jmax = f; + + if (n > nmax) + nmax = n; + } + + assertTrue(jmax < 1.0); + assertTrue(nmax < 1.0); + } + @Override public char ordering() { return 'c'; From c783a5938a57a45bc12743af0fad45e51e040ff0 Mon Sep 17 00:00:00 2001 From: Abdelrauf Date: Mon, 1 Jun 2020 12:47:21 +0400 Subject: [PATCH 18/21] Cmake compilation helper module (#478) * Cmake: generate compilation units Corrections. Added loops/cpu compilation units Config files Signed-off-by: Abdelrauf * Cmake compilation helper module: fix cmake variable scope and some typos Signed-off-by: Abdelrauf * Cmake compilation helper: added other generations. should fix cuda compilation Signed-off-by: Abdelrauf * crop_and_resize adopted new compilation units setup Signed-off-by: raver119@gmail.com * Cmake Compilation Helper: Added comments and some configurations file Signed-off-by: Abdelrauf * minor fix Signed-off-by: Abdelrauf Co-authored-by: raver119@gmail.com --- libnd4j/CMakeLists.txt | 4 + libnd4j/blas/CMakeLists.txt | 27 +++-- libnd4j/cmake/GenCompilation.cmake | 103 ++++++++++++++++++ ...0.cpp => IndexReductionLoops_int32.cpp.in} | 7 +- .../cpu/loops/IndexReductionLoops_int32_2.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_3.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_4.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_5.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_6.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_7.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_8.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int32_9.cpp | 24 ---- ...1.cpp => IndexReductionLoops_int64.cpp.in} | 6 +- .../cpu/loops/IndexReductionLoops_int64_0.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_1.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_2.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_3.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_4.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_5.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_6.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_7.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_8.cpp | 24 ---- .../cpu/loops/IndexReductionLoops_int64_9.cpp | 24 ---- .../cpu/loops/Reduction3Loops.cpp.in} | 9 +- ...uction3Loops_0.cpp => Reduction3Loops.hpp} | 3 +- .../helpers/cpu/loops/Reduction3Loops_1.cpp | 60 ---------- .../helpers/cpu/loops/Reduction3Loops_2.cpp | 60 ---------- .../helpers/cpu/loops/Reduction3Loops_3.cpp | 60 ---------- .../cpu/loops/ReductionLoops_float.cpp.in} | 12 +- ...s_float_0.cpp => ReductionLoops_float.hpp} | 3 +- .../cpu/loops/ReductionLoops_float_1.cpp | 52 --------- .../cpu/loops/ReductionLoops_float_2.cpp | 49 --------- .../cpu/loops/ReductionLoops_float_3.cpp | 49 --------- ...st_bool_p0.cpp => broadcast_bool_p.cpp.in} | 5 +- .../compilation_units/broadcast_bool_p1.cpp | 27 ----- .../compilation_units/broadcast_bool_p2.cpp | 27 ----- .../compilation_units/broadcast_bool_p3.cpp | 27 ----- .../compilation_units/broadcast_bool_p4.cpp | 27 ----- .../compilation_units/broadcast_bool_p5.cpp | 27 ----- .../compilation_units/broadcast_bool_p6.cpp | 27 ----- .../compilation_units/broadcast_bool_p7.cpp | 27 ----- .../compilation_units/broadcast_bool_p8.cpp | 27 ----- .../compilation_units/broadcast_bool_p9.cpp | 27 ----- ...cast_int_p0.cpp => broadcast_int_p.cpp.in} | 5 +- .../compilation_units/broadcast_int_p1.cpp | 27 ----- .../compilation_units/broadcast_int_p2.cpp | 27 ----- .../compilation_units/broadcast_int_p3.cpp | 27 ----- .../compilation_units/broadcast_int_p4.cpp | 27 ----- .../compilation_units/broadcast_int_p5.cpp | 27 ----- .../compilation_units/broadcast_int_p6.cpp | 27 ----- .../compilation_units/broadcast_int_p7.cpp | 27 ----- .../{broadcast_p0.cpp => broadcast_p.cpp.in} | 6 +- .../cpu/compilation_units/broadcast_p1.cpp | 27 ----- .../cpu/compilation_units/broadcast_p10.cpp | 27 ----- .../cpu/compilation_units/broadcast_p11.cpp | 27 ----- .../cpu/compilation_units/broadcast_p12.cpp | 27 ----- .../cpu/compilation_units/broadcast_p2.cpp | 27 ----- .../cpu/compilation_units/broadcast_p3.cpp | 27 ----- .../cpu/compilation_units/broadcast_p4.cpp | 27 ----- .../cpu/compilation_units/broadcast_p5.cpp | 27 ----- .../cpu/compilation_units/broadcast_p6.cpp | 27 ----- .../cpu/compilation_units/broadcast_p7.cpp | 27 ----- .../cpu/compilation_units/broadcast_p8.cpp | 27 ----- .../cpu/compilation_units/broadcast_p9.cpp | 27 ----- ...e_int32_0.cpp => indexreduce_int32.cpp.in} | 6 +- .../compilation_units/indexreduce_int32_2.cpp | 28 ----- .../compilation_units/indexreduce_int32_3.cpp | 28 ----- .../compilation_units/indexreduce_int32_4.cpp | 28 ----- .../compilation_units/indexreduce_int32_5.cpp | 28 ----- .../compilation_units/indexreduce_int32_6.cpp | 28 ----- .../compilation_units/indexreduce_int32_7.cpp | 28 ----- .../compilation_units/indexreduce_int32_8.cpp | 28 ----- .../compilation_units/indexreduce_int32_9.cpp | 28 ----- ...e_int32_1.cpp => indexreduce_int64.cpp.in} | 6 +- .../compilation_units/indexreduce_int64_0.cpp | 28 ----- .../compilation_units/indexreduce_int64_1.cpp | 28 ----- .../compilation_units/indexreduce_int64_2.cpp | 28 ----- .../compilation_units/indexreduce_int64_3.cpp | 28 ----- .../compilation_units/indexreduce_int64_4.cpp | 28 ----- .../compilation_units/indexreduce_int64_5.cpp | 28 ----- .../compilation_units/indexreduce_int64_6.cpp | 28 ----- .../compilation_units/indexreduce_int64_7.cpp | 28 ----- .../compilation_units/indexreduce_int64_8.cpp | 28 ----- .../compilation_units/indexreduce_int64_9.cpp | 28 ----- .../{pairwise_p0.cpp => pairwise_p.cpp.in} | 6 +- .../cpu/compilation_units/pairwise_p1.cpp | 28 ----- .../cpu/compilation_units/pairwise_p10.cpp | 27 ----- .../cpu/compilation_units/pairwise_p11.cpp | 27 ----- .../cpu/compilation_units/pairwise_p12.cpp | 27 ----- .../cpu/compilation_units/pairwise_p2.cpp | 27 ----- .../cpu/compilation_units/pairwise_p3.cpp | 27 ----- .../cpu/compilation_units/pairwise_p4.cpp | 27 ----- .../cpu/compilation_units/pairwise_p5.cpp | 27 ----- .../cpu/compilation_units/pairwise_p6.cpp | 27 ----- .../cpu/compilation_units/pairwise_p7.cpp | 27 ----- .../cpu/compilation_units/pairwise_p8.cpp | 27 ----- .../cpu/compilation_units/pairwise_p9.cpp | 27 ----- .../{random_0.cpp => random.cpp.in} | 6 +- .../loops/cpu/compilation_units/random_1.cpp | 27 ----- .../loops/cpu/compilation_units/random_2.cpp | 27 ----- .../loops/cpu/compilation_units/random_3.cpp | 27 ----- ...bfloat16_0.cpp => reduce3_bfloat16.cpp.in} | 6 +- .../compilation_units/reduce3_bfloat16_4.cpp | 28 ----- .../compilation_units/reduce3_bfloat16_5.cpp | 28 ----- .../compilation_units/reduce3_bfloat16_6.cpp | 28 ----- .../compilation_units/reduce3_bfloat16_7.cpp | 28 ----- .../compilation_units/reduce3_bfloat16_8.cpp | 28 ----- .../compilation_units/reduce3_bfloat16_9.cpp | 28 ----- ...3_bfloat16_1.cpp => reduce3_double.cpp.in} | 6 +- .../compilation_units/reduce3_double_0.cpp | 28 ----- .../compilation_units/reduce3_double_1.cpp | 28 ----- .../compilation_units/reduce3_double_2.cpp | 28 ----- .../compilation_units/reduce3_double_3.cpp | 28 ----- .../compilation_units/reduce3_double_4.cpp | 28 ----- .../compilation_units/reduce3_double_5.cpp | 28 ----- .../compilation_units/reduce3_double_6.cpp | 28 ----- .../compilation_units/reduce3_double_7.cpp | 28 ----- .../compilation_units/reduce3_double_8.cpp | 28 ----- .../compilation_units/reduce3_double_9.cpp | 28 ----- ...e3_bfloat16_2.cpp => reduce3_float.cpp.in} | 6 +- ..._bfloat16_3.cpp => reduce3_float16.cpp.in} | 6 +- .../compilation_units/reduce3_float16_0.cpp | 28 ----- .../compilation_units/reduce3_float16_1.cpp | 28 ----- .../compilation_units/reduce3_float16_2.cpp | 28 ----- .../compilation_units/reduce3_float16_3.cpp | 28 ----- .../compilation_units/reduce3_float16_4.cpp | 28 ----- .../compilation_units/reduce3_float16_5.cpp | 28 ----- .../compilation_units/reduce3_float16_6.cpp | 28 ----- .../compilation_units/reduce3_float16_7.cpp | 28 ----- .../compilation_units/reduce3_float16_8.cpp | 28 ----- .../compilation_units/reduce3_float16_9.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_0.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_1.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_2.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_3.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_4.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_5.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_6.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_7.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_8.cpp | 28 ----- .../cpu/compilation_units/reduce3_float_9.cpp | 28 ----- ...reduce_float_0.cpp => reduce_float.cpp.in} | 6 +- .../cpu/compilation_units/reduce_float_2.cpp | 28 ----- .../cpu/compilation_units/reduce_float_3.cpp | 28 ----- .../{scalar_p0.cpp => scalar_p.cpp.in} | 6 +- .../loops/cpu/compilation_units/scalar_p1.cpp | 27 ----- .../cpu/compilation_units/scalar_p10.cpp | 27 ----- .../cpu/compilation_units/scalar_p11.cpp | 27 ----- .../cpu/compilation_units/scalar_p12.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p2.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p3.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p4.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p5.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p6.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p7.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p8.cpp | 27 ----- .../loops/cpu/compilation_units/scalar_p9.cpp | 27 ----- .../broadcasting_0.cu => broadcasting.cu.in} | 6 +- .../broadcasting/broadcasting_1.cu | 27 ----- .../broadcasting/broadcasting_10.cu | 27 ----- .../broadcasting/broadcasting_11.cu | 27 ----- .../broadcasting/broadcasting_12.cu | 27 ----- .../broadcasting/broadcasting_2.cu | 27 ----- .../broadcasting/broadcasting_3.cu | 27 ----- .../broadcasting/broadcasting_4.cu | 27 ----- .../broadcasting/broadcasting_5.cu | 27 ----- .../broadcasting/broadcasting_6.cu | 27 ----- .../broadcasting/broadcasting_7.cu | 27 ----- .../broadcasting/broadcasting_8.cu | 27 ----- .../broadcasting/broadcasting_9.cu | 27 ----- .../pairwise_0.cu => pairwise.cu.in} | 6 +- .../compilation_units/pairwise/pairwise_1.cu | 27 ----- .../compilation_units/pairwise/pairwise_10.cu | 27 ----- .../compilation_units/pairwise/pairwise_11.cu | 27 ----- .../compilation_units/pairwise/pairwise_12.cu | 27 ----- .../compilation_units/pairwise/pairwise_2.cu | 27 ----- .../compilation_units/pairwise/pairwise_3.cu | 27 ----- .../compilation_units/pairwise/pairwise_4.cu | 27 ----- .../compilation_units/pairwise/pairwise_5.cu | 27 ----- .../compilation_units/pairwise/pairwise_6.cu | 27 ----- .../compilation_units/pairwise/pairwise_7.cu | 27 ----- .../compilation_units/pairwise/pairwise_8.cu | 27 ----- .../compilation_units/pairwise/pairwise_9.cu | 27 ----- .../cuda/compilation_units/reduce3.cu.in | 27 +++++ .../compilation_units/reduce3/reduce3_0.cu | 27 ----- .../compilation_units/reduce3/reduce3_1.cu | 27 ----- .../compilation_units/reduce3/reduce3_2.cu | 27 ----- .../compilation_units/reduce3/reduce3_3.cu | 27 ----- .../compilation_units/reduce_float.cu.in} | 9 +- .../reduce_float/reduce_float_0.cu | 27 ----- .../reduce_float/reduce_float_1.cu | 27 ----- .../reduce_float/reduce_float_2.cu | 27 ----- .../reduce_float/reduce_float_3.cu | 27 ----- .../{scalar/scalar_0.cu => scalar.cu.in} | 6 +- .../cuda/compilation_units/scalar/scalar_1.cu | 27 ----- .../compilation_units/scalar/scalar_10.cu | 27 ----- .../compilation_units/scalar/scalar_11.cu | 27 ----- .../compilation_units/scalar/scalar_12.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_2.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_3.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_4.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_5.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_6.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_7.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_8.cu | 27 ----- .../cuda/compilation_units/scalar/scalar_9.cu | 27 ----- .../cpu/compilation_units/argamax.cpp.in | 10 +- .../cpu/compilation_units/argamin.cpp.in | 1 + .../cpu/compilation_units/argmax.cpp.in | 1 + .../cpu/compilation_units/argmin.cpp.in | 2 + ...nd_resize_3.cpp => crop_and_resize.cpp.in} | 7 +- .../crop_and_resize/crop_and_resize_0.cpp | 30 ----- .../crop_and_resize/crop_and_resize_1.cpp | 30 ----- .../crop_and_resize/crop_and_resize_2.cpp | 30 ----- .../crop_and_resize/crop_and_resize_4.cpp | 30 ----- .../crop_and_resize/crop_and_resize_5.cpp | 30 ----- .../crop_and_resize/crop_and_resize_6.cpp | 30 ----- .../crop_and_resize/crop_and_resize_7.cpp | 30 ----- .../crop_and_resize/crop_and_resize_8.cpp | 30 ----- .../crop_and_resize/crop_and_resize_9.cpp | 30 ----- ...ls_double_0.cpp => specials_double.cpp.in} | 8 +- .../compilation_units/specials_double_3.cpp | 26 ----- .../compilation_units/specials_double_4.cpp | 26 ----- .../compilation_units/specials_double_5.cpp | 26 ----- .../compilation_units/specials_double_6.cpp | 26 ----- .../compilation_units/specials_double_7.cpp | 26 ----- .../compilation_units/specials_double_8.cpp | 26 ----- .../compilation_units/specials_double_9.cpp | 26 ----- ...ls_single_0.cpp => specials_single.cpp.in} | 6 +- .../compilation_units/specials_single_1.cpp | 26 ----- .../compilation_units/specials_single_2.cpp | 26 ----- .../compilation_units/specials_single_3.cpp | 26 ----- .../compilation_units/specials_single_4.cpp | 26 ----- .../compilation_units/specials_single_5.cpp | 26 ----- .../compilation_units/specials_single_6.cpp | 26 ----- .../compilation_units/specials_single_7.cpp | 26 ----- .../compilation_units/specials_single_8.cpp | 26 ----- .../compilation_units/specials_single_9.cpp | 26 ----- .../tests_cpu/libnd4j_tests/CMakeLists.txt | 15 +-- 239 files changed, 253 insertions(+), 5791 deletions(-) create mode 100644 libnd4j/cmake/GenCompilation.cmake rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int32_0.cpp => IndexReductionLoops_int32.cpp.in} (88%) delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int32_1.cpp => IndexReductionLoops_int64.cpp.in} (88%) delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp rename libnd4j/include/{ops/impl/compilation_units/specials_double_1.cpp => helpers/cpu/loops/Reduction3Loops.cpp.in} (80%) rename libnd4j/include/helpers/cpu/loops/{Reduction3Loops_0.cpp => Reduction3Loops.hpp} (96%) delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp rename libnd4j/include/{ops/impl/compilation_units/specials_double_2.cpp => helpers/cpu/loops/ReductionLoops_float.cpp.in} (79%) rename libnd4j/include/helpers/cpu/loops/{ReductionLoops_float_0.cpp => ReductionLoops_float.hpp} (95%) delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp rename libnd4j/include/loops/cpu/compilation_units/{broadcast_bool_p0.cpp => broadcast_bool_p.cpp.in} (87%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp rename libnd4j/include/loops/cpu/compilation_units/{broadcast_int_p0.cpp => broadcast_int_p.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp rename libnd4j/include/loops/cpu/compilation_units/{broadcast_p0.cpp => broadcast_p.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int32_0.cpp => indexreduce_int32.cpp.in} (87%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int32_1.cpp => indexreduce_int64.cpp.in} (87%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{pairwise_p0.cpp => pairwise_p.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp rename libnd4j/include/loops/cpu/compilation_units/{random_0.cpp => random.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_3.cpp rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_0.cpp => reduce3_bfloat16.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_1.cpp => reduce3_double.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_2.cpp => reduce3_float.cpp.in} (89%) rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_3.cpp => reduce3_float16.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp rename libnd4j/include/loops/cpu/compilation_units/{reduce_float_0.cpp => reduce_float.cpp.in} (87%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp rename libnd4j/include/loops/cpu/compilation_units/{scalar_p0.cpp => scalar_p.cpp.in} (89%) delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp rename libnd4j/include/loops/cuda/compilation_units/{broadcasting/broadcasting_0.cu => broadcasting.cu.in} (89%) delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu rename libnd4j/include/loops/cuda/compilation_units/{pairwise/pairwise_0.cu => pairwise.cu.in} (88%) delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu rename libnd4j/include/loops/{cpu/compilation_units/reduce_float_1.cpp => cuda/compilation_units/reduce_float.cu.in} (86%) delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu rename libnd4j/include/loops/cuda/compilation_units/{scalar/scalar_0.cu => scalar.cu.in} (89%) delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{crop_and_resize/crop_and_resize_3.cpp => crop_and_resize.cpp.in} (84%) delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp rename libnd4j/include/ops/impl/compilation_units/{specials_double_0.cpp => specials_double.cpp.in} (86%) delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp rename libnd4j/include/ops/impl/compilation_units/{specials_single_0.cpp => specials_single.cpp.in} (90%) delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 3376bd6b6..0c78b3409 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -17,6 +17,10 @@ option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" OF set(FLATBUFFERS_BUILD_FLATC "OFF" CACHE STRING "Hack to disable flatc build" FORCE) set(CMAKE_CXX_STANDARD 11) + + +include(GenCompilation) + if (SD_CUDA) enable_language(CUDA) set(CMAKE_CUDA_STANDARD 11) diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index 8419cdd4c..fb1dc066e 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -226,6 +226,14 @@ if(SD_CUDA) file(GLOB_RECURSE LEGACY_SOURCES false ../include/legacy/impl/*.cpp ../include/legacy/*.cu ../include/legacy/*.h) file(GLOB_RECURSE LOOPS_SOURCES_CUDA false ../include/loops/*.cu) + + file(GLOB_RECURSE COMPILATION_UNITS false ../include/loops/cuda/compilation_units/*.cu.in + ../include/ops/impl/compilation_units/*.cpp.in) + + foreach(FL_ITEM ${COMPILATION_UNITS}) + genCompilation(FL_ITEM) + endforeach() + if (HAVE_CUDNN) message("cuDNN included") file(GLOB_RECURSE CUSTOMOPS_CUDNN_SOURCES false ../include/ops/declarable/platform/cudnn/*.cu) @@ -234,7 +242,9 @@ if(SD_CUDA) add_library(samediff_obj OBJECT ${LOOPS_SOURCES_CUDA} ${LEGACY_SOURCES} ${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES} - ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES}) + ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES} + ${CUSTOMOPS_GENERIC_SOURCES} + ) if (WIN32) message("MSVC runtime for library: ${MSVC_RT_LIB}") @@ -295,15 +305,12 @@ elseif(SD_CPU) file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/*.cpp ../include/loops/*.h) - file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in) - foreach(FL_ITEM ${COMPILATION_UNITS}) - string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) - set(FL_ITEM_WLE ${CMAKE_MATCH_1}) - foreach(FL_TYPE_INDEX RANGE 0 9) - #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") - configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) - LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) - endforeach() + file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in + ../include/loops/cpu/compilation_units/*.cpp.in ../include/helpers/cpu/loops/*.cpp.in + ../include/ops/impl/compilation_units/*.cpp.in) + + foreach(FL_ITEM ${COMPILATION_UNITS}) + genCompilation(FL_ITEM) endforeach() if (SD_X86_BUILD) diff --git a/libnd4j/cmake/GenCompilation.cmake b/libnd4j/cmake/GenCompilation.cmake new file mode 100644 index 000000000..9f977633d --- /dev/null +++ b/libnd4j/cmake/GenCompilation.cmake @@ -0,0 +1,103 @@ +################################################################################ +# Copyright (c) 2020 Konduit K.K. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License, Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# SPDX-License-Identifier: Apache-2.0 +################################################################################ + +#/////////////////////////////////////////////////////////////////////////////// +# genCompilation: Generates cpp, cu files +# INPUT: +# $FILE_ITEM template-configuration that utilizes libnd4j type, macros helpers +# defined inside { include/types/types.h, include/system/type_boilerplate.h} +# OUTPUT: +# $CUSTOMOPS_GENERIC_SOURCES generated files will be added into this List +#//////////////////////////////////////////////////////////////////////////////// +# A simple template-configuration file example: +# // hints and defines what types will be generated +# #cmakedefine LIBND4J_TYPE_GEN +# #cmakedefine FLOAT_TYPE_GEN +# // below if defines blocks are needed for correctly handling multiple types +# #if defined(LIBND4J_TYPE_GEN) +# BUILD_DOUBLE_TEMPLATE(template void someFunc, (arg_list,..), +# LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); +# #endif +# #if defined(FLOAT_TYPE_GEN) +# BUILD_SINGLE_TEMPLATE(template class SomeClass,, FLOAT_TYPES_@FL_TYPE_INDEX@); +# #endif +#//////////////////////////////////////////////////////////////////////////////// + +function(genCompilation FILE_ITEM) + get_filename_component(FILE_ITEM_WE ${FL_ITEM} NAME_WE) + + set(EXTENSION "cpp") + + if(FL_ITEM MATCHES "cu.in$") + set(EXTENSION "cu") + endif() + + file(READ ${FL_ITEM} CONTENT_FL) + #check content for types + + #set all to false + set (FLOAT_TYPE_GEN 0) + set (INT_TYPE_GEN 0) + set (LIBND4J_TYPE_GEN 0) + set (PAIRWISE_TYPE_GEN 0) + set (RANGE_STOP -1) + + string(REGEX MATCHALL "#cmakedefine[ \t]+[^_]+_TYPE_GEN" TYPE_MATCHES ${CONTENT_FL}) + + foreach(TYPEX ${TYPE_MATCHES}) + set(STOP -1) + if(TYPEX MATCHES "INT_TYPE_GEN$") + set (INT_TYPE_GEN 1) + set(STOP 7) + endif() + if(TYPEX MATCHES "LIBND4J_TYPE_GEN$") + set (LIBND4J_TYPE_GEN 1) + set(STOP 9) + endif() + if(TYPEX MATCHES "FLOAT_TYPE_GEN$") + set (FLOAT_TYPE_GEN 1) + set(STOP 3) + endif() + if(TYPEX MATCHES "PAIRWISE_TYPE_GEN$") + set (PAIRWISE_TYPE_GEN 1) + set(STOP 12) + endif() + if(STOP GREATER RANGE_STOP) + set(RANGE_STOP ${STOP}) + endif() + + endforeach() + + if(RANGE_STOP GREATER -1) + foreach(FL_TYPE_INDEX RANGE 0 ${RANGE_STOP}) + # set OFF if the index is above + if(FL_TYPE_INDEX GREATER 3) + set (FLOAT_TYPE_GEN 0) + endif() + if(FL_TYPE_INDEX GREATER 7) + set (INT_TYPE_GEN 0) + endif() + if(FL_TYPE_INDEX GREATER 9) + set (LIBND4J_TYPE_GEN 0) + endif() + set(GENERATED_SOURCE "${CMAKE_BINARY_DIR}/compilation_units/${FILE_ITEM_WE}_${FL_TYPE_INDEX}.${EXTENSION}") + configure_file( "${FL_ITEM}" "${GENERATED_SOURCE}" @ONLY) + LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${GENERATED_SOURCE} ) + endforeach() + endif() + + set(CUSTOMOPS_GENERIC_SOURCES ${CUSTOMOPS_GENERIC_SOURCES} PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in similarity index 88% rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in index 97318dae8..2030c8017 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in @@ -18,7 +18,6 @@ // // @author Yurii Shyrma (iuriish@yahoo.com) // - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); \ No newline at end of file +#include +#cmakedefine LIBND4J_TYPE_GEN +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp deleted file mode 100644 index e22635b85..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp deleted file mode 100644 index f85096f0a..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp deleted file mode 100644 index 5272eba7e..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp deleted file mode 100644 index 683d6d0c0..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp deleted file mode 100644 index 0ff70b7b5..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp deleted file mode 100644 index 64d93c5e3..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp deleted file mode 100644 index dd586ab26..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp deleted file mode 100644 index bb7ef80f7..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in similarity index 88% rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in index 680bf7a64..0647ce17d 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in @@ -19,6 +19,6 @@ // @author Yurii Shyrma (iuriish@yahoo.com) // -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); \ No newline at end of file +#include +#cmakedefine LIBND4J_TYPE_GEN +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp deleted file mode 100644 index 8d0c55ce1..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp deleted file mode 100644 index 7c5824559..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp deleted file mode 100644 index 3bb6e6b7c..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp deleted file mode 100644 index 49f977901..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp deleted file mode 100644 index 73f0e9872..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp deleted file mode 100644 index b27aaf341..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp deleted file mode 100644 index 452184acd..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp deleted file mode 100644 index 59cbc51cf..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp deleted file mode 100644 index 51fc49cea..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp deleted file mode 100644 index b774dde52..000000000 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author Yurii Shyrma (iuriish@yahoo.com) -// - -#include "./IndexReductionLoops.hpp" - -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in similarity index 80% rename from libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp rename to libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in index a61a98870..4f38b4d8f 100644 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in @@ -1,6 +1,5 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,8 +18,10 @@ // @author raver119@gmail.com // -#include "../specials_double.hpp" +#include +#cmakedefine FLOAT_TYPE_GEN namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_1); -} \ No newline at end of file + + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@); +} diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp similarity index 96% rename from libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp rename to libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp index 00b15673b..241dc7e8c 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp @@ -55,6 +55,5 @@ namespace sd { DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_0); + } diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp deleted file mode 100644 index da8d3db7e..000000000 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); -#endif - } - - template - template - void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); -#endif - } - - template - void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - template - void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_1); -} diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp deleted file mode 100644 index 06588a2fb..000000000 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); -#endif - } - - template - template - void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); -#endif - } - - template - void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - template - void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_2); -} diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp deleted file mode 100644 index 405b0275b..000000000 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); -#endif - } - - template - template - void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); -#endif - } - - template - void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - template - void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_3); -} diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in similarity index 79% rename from libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp rename to libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in index 89deb3d9c..5c1bb227d 100644 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in @@ -1,6 +1,5 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,8 +18,11 @@ // @author raver119@gmail.com // -#include "../specials_double.hpp" - +#include +#cmakedefine FLOAT_TYPE_GEN namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_2); -} \ No newline at end of file + + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@); +} + + diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp similarity index 95% rename from libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp rename to libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp index a3879bee3..c7ed544b2 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp @@ -44,8 +44,7 @@ namespace sd { DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_0); + } diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp deleted file mode 100644 index 6dd555037..000000000 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "ReductionLoops.hpp" -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); -#endif - } - - template - void ReductionFloatLoops::wrapper(const int opNum, - const X *x, const Nd4jLong *xShapeInfo, - Y *z, const Nd4jLong *zShapeInfo, - const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, - Y *extraParams, - int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_1); -} - - diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp deleted file mode 100644 index ce1042b88..000000000 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "ReductionLoops.hpp" -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); -#endif - } - - template - void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, - const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, - const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_2); -} - - diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp deleted file mode 100644 index 6cfac93bc..000000000 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "ReductionLoops.hpp" -#include -#include - -using namespace simdOps; - -namespace sd { - - template - template - void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); -#endif - } - - template - void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, - const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, - const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { -#ifndef INLINE_LOOPS - DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); -#endif - } - - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_3); -} - - diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in similarity index 87% rename from libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp rename to libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in index 08ebd92f7..b3c60462b 100644 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in @@ -18,10 +18,11 @@ // Created by raver119 on 23/09/18. // -#include "../broadcasting_bool.hpp" +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_0, BOOL_TYPES); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_@FL_TYPE_INDEX@, BOOL_TYPES); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp deleted file mode 100644 index 16e4c817a..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_1, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp deleted file mode 100644 index 10b32ca41..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_2, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp deleted file mode 100644 index 547ddd371..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_3, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp deleted file mode 100644 index 3c7dee0a0..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_4, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp deleted file mode 100644 index b71925dab..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_5, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp deleted file mode 100644 index 23eedd289..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_6, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp deleted file mode 100644 index c18e7641e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_7, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp deleted file mode 100644 index efee34519..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_8, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp deleted file mode 100644 index 2ab193285..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_bool.hpp" - -namespace functions { - namespace broadcast { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_9, BOOL_TYPES); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp rename to libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in index d3f5ada43..a36c1a0b2 100644 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in @@ -18,10 +18,11 @@ // Created by raver119 on 23/09/18. // -#include "../broadcasting_int.hpp" +#include +#cmakedefine INT_TYPE_GEN namespace functions { namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_0); + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp deleted file mode 100644 index 82969bdb0..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp deleted file mode 100644 index 53d928111..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp deleted file mode 100644 index eba7b78d1..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp deleted file mode 100644 index 47b7350f2..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp deleted file mode 100644 index 3afad08f6..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp deleted file mode 100644 index 286c2680f..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp deleted file mode 100644 index 242441561..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting_int.hpp" - -namespace functions { - namespace broadcast { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp rename to libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in index 943186a8a..1dbb4aac4 100644 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in @@ -18,10 +18,10 @@ // Created by raver119 on 23/09/18. // -#include "../broadcasting.hpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp deleted file mode 100644 index b38a1c801..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp deleted file mode 100644 index 983305007..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp deleted file mode 100644 index 206b14763..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp deleted file mode 100644 index 825c07adf..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp deleted file mode 100644 index 341f1afb4..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp deleted file mode 100644 index 9aa4c227b..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp deleted file mode 100644 index 7f68bb1f8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp deleted file mode 100644 index d2e586bf8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp deleted file mode 100644 index a9db2f7f8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp deleted file mode 100644 index 9a2111ee5..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp deleted file mode 100644 index 4bbd88ba6..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp deleted file mode 100644 index 406a8f8e2..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "../broadcasting.hpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in similarity index 87% rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in index 89b85485a..97402d38e 100644 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../indexreduce.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT32, int32_t)); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp deleted file mode 100644 index 47dce2d5a..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp deleted file mode 100644 index c3d33e7f1..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp deleted file mode 100644 index 37a81e441..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp deleted file mode 100644 index 1d6555ddf..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp deleted file mode 100644 index 0bb8aef4d..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp deleted file mode 100644 index a7d3c733f..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp deleted file mode 100644 index 8c5de9653..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp deleted file mode 100644 index f61d604e2..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in similarity index 87% rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in index ada7844cb..30fa30749 100644 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../indexreduce.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT64, Nd4jLong)); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp deleted file mode 100644 index d399f5e0e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp deleted file mode 100644 index c4df4d2e4..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp deleted file mode 100644 index 538e369eb..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp deleted file mode 100644 index b0d082bce..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp deleted file mode 100644 index 98e13bb63..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp deleted file mode 100644 index 4b7f599d9..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp deleted file mode 100644 index 8d7de9822..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp deleted file mode 100644 index 8f9befddb..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp deleted file mode 100644 index b38112631..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp deleted file mode 100644 index baacdc432..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../indexreduce.hpp" - -namespace functions { - namespace indexreduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp rename to libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in index d498a4400..bbf809de8 100644 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in @@ -18,10 +18,10 @@ // Created by raver119 on 23/09/18. // -#include "loops/cpu/pairwise.hpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp deleted file mode 100644 index 2a665d9d2..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1); - } - -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp deleted file mode 100644 index 4a8aaf94a..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp deleted file mode 100644 index 1f4eb1389..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp deleted file mode 100644 index 3c0984db9..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp deleted file mode 100644 index 0725ae862..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp deleted file mode 100644 index f9dcf3519..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp deleted file mode 100644 index a7b63427d..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp deleted file mode 100644 index 3f8557ea9..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp deleted file mode 100644 index 2b5dc9ed4..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp deleted file mode 100644 index f5deef719..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp deleted file mode 100644 index e2fa75bbb..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp deleted file mode 100644 index eb3da276e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver119 on 23/09/18. -// - -#include "loops/cpu/pairwise.hpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_0.cpp b/libnd4j/include/loops/cpu/compilation_units/random.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/random_0.cpp rename to libnd4j/include/loops/cpu/compilation_units/random.cpp.in index 6424ccb6e..921532ac8 100644 --- a/libnd4j/include/loops/cpu/compilation_units/random_0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/random.cpp.in @@ -18,10 +18,10 @@ // @author raver119@gmail.com // -#include "../random.hpp" - +#include +#cmakedefine FLOAT_TYPE_GEN namespace functions { namespace random { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_0); + BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_1.cpp b/libnd4j/include/loops/cpu/compilation_units/random_1.cpp deleted file mode 100644 index 316d55bf6..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/random_1.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../random.hpp" - -namespace functions { - namespace random { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_2.cpp b/libnd4j/include/loops/cpu/compilation_units/random_2.cpp deleted file mode 100644 index 90d080b63..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/random_2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../random.hpp" - -namespace functions { - namespace random { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/random_3.cpp b/libnd4j/include/loops/cpu/compilation_units/random_3.cpp deleted file mode 100644 index 97e5211e8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/random_3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../random.hpp" - -namespace functions { - namespace random { - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in index 19483c1df..68616c3f9 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../reduce3.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_3); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_3); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp deleted file mode 100644 index 0802e11f4..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp deleted file mode 100644 index 87ec2d3f8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp deleted file mode 100644 index 10dc7d69b..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp deleted file mode 100644 index 28ba56376..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp deleted file mode 100644 index 8087f6a07..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp deleted file mode 100644 index 4a5186cf0..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in index 88225bd85..5c722838d 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../reduce3.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_3); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_2); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp deleted file mode 100644 index 34172b4b3..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp deleted file mode 100644 index c2f7c7e9c..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp deleted file mode 100644 index 41c1dd679..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp deleted file mode 100644 index a44085232..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp deleted file mode 100644 index d346d175b..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp deleted file mode 100644 index 86cf48ff7..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp deleted file mode 100644 index 92f7ac39e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp deleted file mode 100644 index eb216f89f..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp deleted file mode 100644 index d1e9f8c96..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp deleted file mode 100644 index fa00bde19..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in index 7bed85c5d..ee127c2d9 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../reduce3.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_3); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_1); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in index 87042d342..65c2b563a 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../reduce3.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace functions { namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_3); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_0); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp deleted file mode 100644 index cb212b06b..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp deleted file mode 100644 index 4a7fdee8a..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp deleted file mode 100644 index aaafe1bae..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp deleted file mode 100644 index 9b8cf0c6a..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp deleted file mode 100644 index 4d02ffe53..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp deleted file mode 100644 index 88ce3e5e2..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp deleted file mode 100644 index 26d4df1dd..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp deleted file mode 100644 index 3b04f47aa..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp deleted file mode 100644 index c87090229..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp deleted file mode 100644 index d5acb3935..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp deleted file mode 100644 index e7e1fab61..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp deleted file mode 100644 index 98ccf8b35..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp deleted file mode 100644 index 6782d74ed..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp deleted file mode 100644 index 915b0ac0e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp deleted file mode 100644 index d34e61181..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp deleted file mode 100644 index 89a8f164f..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp deleted file mode 100644 index 70e482b8b..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp deleted file mode 100644 index 88663cd7d..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp deleted file mode 100644 index d5399a4d8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp deleted file mode 100644 index e27e7ab12..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce3.hpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in similarity index 87% rename from libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp rename to libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in index de4619f29..3837c7810 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../reduce/reduce_float.hpp" - +#include +#cmakedefine FLOAT_TYPE_GEN namespace functions { namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_0); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@); } } diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp deleted file mode 100644 index 8cc2795a4..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce/reduce_float.hpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_2); - } -} diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp deleted file mode 100644 index 0b94831c3..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../reduce/reduce_float.hpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_3); - } -} diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in similarity index 89% rename from libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp rename to libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in index 32f670f46..dc024170d 100644 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp +++ b/libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in @@ -18,10 +18,10 @@ // Created by raver on 9/28/2018. // -#include "../scalar.hpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp deleted file mode 100644 index 5146d70bd..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp deleted file mode 100644 index 7175a8603..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp deleted file mode 100644 index a6b7bafac..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp deleted file mode 100644 index 69cbeb7ff..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp deleted file mode 100644 index 1e0f25909..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp deleted file mode 100644 index e4f2c6457..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp deleted file mode 100644 index daabf9325..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp deleted file mode 100644 index cadad858e..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp deleted file mode 100644 index 7e56f65c7..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp deleted file mode 100644 index 85cedcecd..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp deleted file mode 100644 index d593889b8..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp deleted file mode 100644 index 14eb788d7..000000000 --- a/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by raver on 9/28/2018. -// - -#include "../scalar.hpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in similarity index 89% rename from libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu rename to libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in index d7902af87..6349dcfc9 100644 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in @@ -18,10 +18,10 @@ // @author raver119@gmail.com // -#include "../../broadcasting.chpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu deleted file mode 100644 index b24ebdb6c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu deleted file mode 100644 index 4d19a893c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu deleted file mode 100644 index 8b643965b..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu deleted file mode 100644 index 935297a53..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu deleted file mode 100644 index 7d7fdc1b6..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu deleted file mode 100644 index d5c09f114..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu deleted file mode 100644 index f3c64a91a..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu deleted file mode 100644 index 5ca557a30..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu deleted file mode 100644 index 9c53e8b36..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu deleted file mode 100644 index a64b6f0d3..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu deleted file mode 100644 index 4404fed7c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu deleted file mode 100644 index dbb560f5c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../broadcasting.chpp" - -namespace functions { - namespace broadcast { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in similarity index 88% rename from libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu rename to libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in index e57433ae2..312ed7416 100644 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in @@ -18,10 +18,10 @@ // @author raver119@gmail.com // -#include "../../pairwise.chpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu deleted file mode 100644 index 513a2c056..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu deleted file mode 100644 index fac835b18..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu deleted file mode 100644 index f01ef7eb3..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu deleted file mode 100644 index 8cf8c367f..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu deleted file mode 100644 index 8e0261d14..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu deleted file mode 100644 index 86c23344a..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu deleted file mode 100644 index 1ac28891f..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu deleted file mode 100644 index 713fe344c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu deleted file mode 100644 index 0983be1e9..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu deleted file mode 100644 index b12d82eac..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu deleted file mode 100644 index fc1876f3d..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu deleted file mode 100644 index f13c28e85..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../pairwise.chpp" - -namespace functions { - namespace pairwise_transforms { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in b/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in new file mode 100644 index 000000000..dd7472836 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#cmakedefine FLOAT_TYPE_GEN +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu deleted file mode 100644 index d3aeadb5f..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce3.chpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu deleted file mode 100644 index cfc7cb5f3..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce3.chpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu deleted file mode 100644 index 754ac9f52..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce3.chpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu deleted file mode 100644 index 340698b34..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce3.chpp" - -namespace functions { - namespace reduce3 { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp b/libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in similarity index 86% rename from libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp rename to libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in index bfa88bc3b..34c2bf8ca 100644 --- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp +++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in @@ -1,6 +1,5 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,10 +18,10 @@ // @author raver119@gmail.com // -#include "../reduce/reduce_float.hpp" - +#include +#cmakedefine FLOAT_TYPE_GEN namespace functions { namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_1); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@); } -} +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu deleted file mode 100644 index dd893939d..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce/reduce_float.chpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_0); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu deleted file mode 100644 index 4d98cb61c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce/reduce_float.chpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu deleted file mode 100644 index 346627563..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce/reduce_float.chpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu deleted file mode 100644 index 2852063ad..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../reduce/reduce_float.chpp" - -namespace functions { - namespace reduce { - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu b/libnd4j/include/loops/cuda/compilation_units/scalar.cu.in similarity index 89% rename from libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu rename to libnd4j/include/loops/cuda/compilation_units/scalar.cu.in index 28f754b14..15608bdd1 100644 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu +++ b/libnd4j/include/loops/cuda/compilation_units/scalar.cu.in @@ -18,10 +18,10 @@ // @author raver119@gmail.com // -#include "../../scalar.chpp" - +#include +#cmakedefine PAIRWISE_TYPE_GEN namespace functions { namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0); + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu deleted file mode 100644 index fb54e4767..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu deleted file mode 100644 index e06cad235..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_10); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu deleted file mode 100644 index 3c5549339..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_11); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu deleted file mode 100644 index 7f7f74156..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_12); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu deleted file mode 100644 index af2de5b0e..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu deleted file mode 100644 index a50cee507..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu deleted file mode 100644 index 7f99764d8..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu deleted file mode 100644 index 10e93e14c..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu deleted file mode 100644 index a1a98cf41..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu deleted file mode 100644 index f29d26c44..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu deleted file mode 100644 index 38d275b6f..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8); - } -} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu deleted file mode 100644 index be7c66956..000000000 --- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../../scalar.chpp" - -namespace functions { - namespace scalar { - BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9); - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in index 533a94aab..3cefacb37 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in @@ -19,10 +19,18 @@ #include + +#cmakedefine LIBND4J_TYPE_GEN + +#if defined(PAIRWISE_TYPE_GEN) || defined(INT_TYPE_GEN) || defined(FLOAT_TYPE_GEN) || defined(LIBND4J_TYPE_GEN) namespace sd { namespace ops { namespace helpers { +#if defined(LIBND4J_TYPE_GEN) BUILD_DOUBLE_TEMPLATE(template void argAbsMax_, (const NDArray& input, NDArray& output, const std::vector& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES); +#endif + } } -} \ No newline at end of file +} +#endif diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in index 4f7c78505..9de76d1de 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in @@ -16,6 +16,7 @@ // // @author AbdelRauf // +#cmakedefine LIBND4J_TYPE_GEN #include diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in index 770f155f4..112a91f9f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in @@ -16,6 +16,7 @@ // // @author AbdelRauf // +#cmakedefine LIBND4J_TYPE_GEN #include diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in index 0149b890e..ff8ba1bf2 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in @@ -17,6 +17,8 @@ // @author AbdelRauf // +#cmakedefine LIBND4J_TYPE_GEN + #include namespace sd { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in similarity index 84% rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in index 11175a02d..b0cdafebd 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,12 +20,14 @@ // #include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" +#include + +#cmakedefine LIBND4J_TYPE_GEN namespace sd { namespace ops { namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_3, FLOAT_TYPES, INTEGER_TYPES); + BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES, INTEGER_TYPES); } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp deleted file mode 100644 index 22258266b..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_0, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp deleted file mode 100644 index f2b891d5e..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_1, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp deleted file mode 100644 index c475d994c..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_2, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp deleted file mode 100644 index cea328084..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_4, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp deleted file mode 100644 index 81bb8e897..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_5, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp deleted file mode 100644 index 415ab39e2..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_6, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp deleted file mode 100644 index 47d16e6db..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_7, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp deleted file mode 100644 index 902ade68c..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_8, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp deleted file mode 100644 index 559564903..000000000 --- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author sgazeos@gmail.com -// - -#include -#include "ops/declarable/helpers/cpu/crop_and_resize.hpp" - -namespace sd { - namespace ops { - namespace helpers { - BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_9, FLOAT_TYPES, INTEGER_TYPES); - } - } -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in similarity index 86% rename from libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp rename to libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in index e9d262f58..00e0883f7 100644 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp +++ b/libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in @@ -19,10 +19,10 @@ // @author raver119@gmail.com // -#include "../specials_double.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_0); + BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_@FL_TYPE_INDEX@); - BUILD_DOUBLE_TEMPLATE(template void SpecialTypeConverter::convertGeneric, (Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_TEMPLATE(template void SpecialTypeConverter::convertGeneric, (Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES, LIBND4J_TYPES_@FL_TYPE_INDEX@); } \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp deleted file mode 100644 index 7690749bf..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_3); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp deleted file mode 100644 index 505ea9921..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_4); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp deleted file mode 100644 index caa9d2dfa..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_5); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp deleted file mode 100644 index 9646534a9..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_6); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp deleted file mode 100644 index 3230c1fbc..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_7); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp deleted file mode 100644 index a56b335b6..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_8); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp deleted file mode 100644 index bb13c0415..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_double.hpp" - -namespace sd { - BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_9); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in similarity index 90% rename from libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp rename to libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in index f74717f05..49110d829 100644 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp +++ b/libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in @@ -19,8 +19,8 @@ // @author raver119@gmail.com // -#include "../specials_single.hpp" - +#include +#cmakedefine LIBND4J_TYPE_GEN namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_0); + BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_@FL_TYPE_INDEX@); } \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp deleted file mode 100644 index cbacbb60e..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_1); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp deleted file mode 100644 index b1c7c0db6..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_2); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp deleted file mode 100644 index d340500e5..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_3); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp deleted file mode 100644 index b8ea2a933..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_4); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp deleted file mode 100644 index cc3fe3f0b..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_5); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp deleted file mode 100644 index 4e0b96a82..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_6); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp deleted file mode 100644 index e8bd8d950..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_7); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp deleted file mode 100644 index b2581352e..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_8); -} \ No newline at end of file diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp deleted file mode 100644 index 5105affa8..000000000 --- a/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019-2020 Konduit K.K. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// @author raver119@gmail.com -// - -#include "../specials_single.hpp" - -namespace sd { - BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_9); -} \ No newline at end of file diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt index 92084ef74..7e01e2847 100644 --- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt @@ -225,17 +225,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(MINGW) AND NOT(APPLE)) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") endif() -file(GLOB_RECURSE COMPILATION_UNITS false ../../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in) -foreach(FL_ITEM ${COMPILATION_UNITS}) - string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) - set(FL_ITEM_WLE ${CMAKE_MATCH_1}) - foreach(FL_TYPE_INDEX RANGE 0 9) - #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") - configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) - LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) - endforeach() -endforeach() + file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in + ../include/loops/cpu/compilation_units/*.cpp.in ../include/helpers/cpu/loops/*.cpp.in) + foreach(FL_ITEM ${COMPILATION_UNITS}) + genCompilation(FL_ITEM) + endforeach() # this function strips path from file name, basically making up short file name, i.e. file.cpp function(SHORTNAME LONG_NAME OUTPUT) From 45ebd4899c009cf7776abe275c2ef5269819245a Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 2 Jun 2020 10:43:12 +0300 Subject: [PATCH 19/21] CUDA small sort tests (#482) * couple of C++ sort tests Signed-off-by: raver119@gmail.com * Java sort test Signed-off-by: raver119@gmail.com --- libnd4j/include/array/NDArray.h | 8 +-- .../layers_tests/LegacyOpsCudaTests.cu | 52 +++++++++++++++++++ .../java/org/nd4j/nativeblas/Nd4jCuda.java | 15 +++--- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 15 +++--- .../test/java/org/nd4j/linalg/Nd4jTestsC.java | 8 +++ 5 files changed, 82 insertions(+), 16 deletions(-) diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index 04500a987..c314d25b6 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -354,11 +354,11 @@ namespace sd { * @param writeList * @param readList */ - static void registerSpecialUse(const std::vector& writeList, const std::vector& readList); - static void prepareSpecialUse(const std::vector& writeList, const std::vector& readList, bool synchronizeWritables = false); + static void registerSpecialUse(const std::vector& writeList, const std::vector& readList = {}); + static void prepareSpecialUse(const std::vector& writeList, const std::vector& readList = {}, bool synchronizeWritables = false); - static void registerPrimaryUse(const std::vector& writeList, const std::vector& readList); - static void preparePrimaryUse(const std::vector& writeList, const std::vector& readList, bool synchronizeWritables = false); + static void registerPrimaryUse(const std::vector& writeList, const std::vector& readList = {}); + static void preparePrimaryUse(const std::vector& writeList, const std::vector& readList = {}, bool synchronizeWritables = false); /** * This method returns buffer pointer offset by given number of elements, wrt own data type diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu index 53179cd68..622ce9fbb 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu @@ -58,3 +58,55 @@ TEST_F(LegacyOpsCudaTests, test_sortTad_1) { ASSERT_EQ(e, x); } + +TEST_F(LegacyOpsCudaTests, test_sort_1) { + auto x = NDArrayFactory::create('c', {4}, {4.f, 2.f, 1.f, 3.f}); + auto e = NDArrayFactory::create('c', {4}, {1.f, 2.f, 3.f, 4.f}); + + Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; + + NDArray::prepareSpecialUse({&x}, {&x}); + ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false); + NDArray::registerSpecialUse({&x}); + + ASSERT_EQ(e, x); +} + +TEST_F(LegacyOpsCudaTests, test_sort_2) { + auto x = NDArrayFactory::create('c', {4}, {4.f, 2.f, 1.f, 3.f}); + auto e = NDArrayFactory::create('c', {4}, {4.f, 3.f, 2.f, 1.f}); + + Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; + + NDArray::prepareSpecialUse({&x}, {&x}); + ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), true); + NDArray::registerSpecialUse({&x}); + + ASSERT_EQ(e, x); +} + +TEST_F(LegacyOpsCudaTests, test_sort_3) { + auto x = NDArrayFactory::create('c', {4}, {0.5, 0.4, 0.1, 0.2}); + auto e = NDArrayFactory::create('c', {4}, {0.1, 0.2, 0.4, 0.5}); + + Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; + + NDArray::prepareSpecialUse({&x}, {&x}); + ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false); + NDArray::registerSpecialUse({&x}); + + ASSERT_EQ(e, x); +} + +TEST_F(LegacyOpsCudaTests, test_sort_4) { + auto x = NDArrayFactory::create('c', {4}, {7, 4, 9, 2}); + auto e = NDArrayFactory::create('c', {4}, {2, 4, 7, 9}); + + Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; + + NDArray::prepareSpecialUse({&x}, {&x}); + ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false); + NDArray::registerSpecialUse({&x}); + + ASSERT_EQ(e, x); +} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index ad9503849..cc6ffc19a 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -3849,13 +3849,15 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * @param writeList * @param readList */ - public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); - public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); - public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); + public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList); + public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList); + public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); + public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList); - public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); - public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); - public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); + public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList); + public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList); + public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); + public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList); /** * This method returns buffer pointer offset by given number of elements, wrt own data type @@ -5043,6 +5045,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #define LIBND4J_GRAPH_RNG_H // #include +// #include // #include // #include // #include diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index 402b096c6..f17f11093 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -3853,13 +3853,15 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * @param writeList * @param readList */ - public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); - public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); - public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); + public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList); + public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList); + public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); + public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList); - public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); - public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); - public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList); + public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList); + public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList); + public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/); + public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList); /** * This method returns buffer pointer offset by given number of elements, wrt own data type @@ -5047,6 +5049,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #define LIBND4J_GRAPH_RNG_H // #include +// #include // #include // #include // #include diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java index c9f5cef6f..e6c380b31 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java @@ -8484,6 +8484,14 @@ public class Nd4jTestsC extends BaseNd4jTest { } } + @Test + public void testSmallSort(){ + INDArray arr = Nd4j.createFromArray(0.5, 0.4, 0.1, 0.2); + INDArray expected = Nd4j.createFromArray(0.1, 0.2, 0.4, 0.5); + INDArray sorted = Nd4j.sort(arr, true); + assertEquals(expected, sorted); + } + @Override public char ordering() { return 'c'; From ee3e059b12ac4994289f17319792f557982391c9 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 5 Jun 2020 11:49:02 +1000 Subject: [PATCH 20/21] DL4J/DataVec: Fix Yolo2OutputLayer and ObjectDetectionRecordReader support for NHWC data format (#483) * Fix Yolo2OutputLayer for NHWC data format Signed-off-by: Alex Black * ObjectDetectionRecordReader NHWC support Signed-off-by: Alex Black --- .../ObjectDetectionRecordReader.java | 57 +++- .../TestObjectDetectionRecordReader.java | 281 +++++++++--------- .../gradientcheck/YoloGradientCheckTests.java | 39 ++- .../layers/objdetect/Yolo2OutputLayer.java | 6 +- .../nn/layers/objdetect/Yolo2OutputLayer.java | 15 +- .../nn/layers/objdetect/YoloUtils.java | 20 +- 6 files changed, 260 insertions(+), 158 deletions(-) diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java index 1a53a05ac..38afd6adf 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java @@ -49,7 +49,7 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.point; /** * An image record reader for object detection. *

- * Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w] + * Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w] (nchw) or [minibatch, h, w, 4+C] (nhwc) * Where the image is quantized into h x w grid locations. *

* Note that this matches the format required for Deeplearning4j's Yolo2OutputLayer @@ -61,42 +61,67 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader { private final int gridW; private final int gridH; private final ImageObjectLabelProvider labelProvider; + private final boolean nchw; protected Image currentImage; /** + * As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider)} but hardcoded + * to NCHW format + */ + public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) { + this(height, width, channels, gridH, gridW, true, labelProvider); + } + + /** + * Create ObjectDetectionRecordReader with * * @param height Height of the output images * @param width Width of the output images * @param channels Number of channels for the output images * @param gridH Grid/quantization size (along height dimension) - Y axis * @param gridW Grid/quantization size (along height dimension) - X axis + * @param nchw If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return + * NHWC format labels with array shape [minibatch, h, w, 4+C] * @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image */ - public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) { + public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw, ImageObjectLabelProvider labelProvider) { super(height, width, channels, null, null); this.gridW = gridW; this.gridH = gridH; + this.nchw = nchw; this.labelProvider = labelProvider; this.appendLabel = labelProvider != null; } /** - * When imageTransform != null, object is removed if new center is outside of transformed image bounds. - * - * @param height Height of the output images - * @param width Width of the output images - * @param channels Number of channels for the output images - * @param gridH Grid/quantization size (along height dimension) - Y axis - * @param gridW Grid/quantization size (along height dimension) - X axis - * @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image - * @param imageTransform ImageTransform - used to transform image and coordinates + * As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider, ImageTransform)} + * but hardcoded to NCHW format */ public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, - ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) { + ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) { + this(height, width, channels, gridH, gridW, true, labelProvider, imageTransform); + } + + /** + * When imageTransform != null, object is removed if new center is outside of transformed image bounds. + * + * @param height Height of the output images + * @param width Width of the output images + * @param channels Number of channels for the output images + * @param gridH Grid/quantization size (along height dimension) - Y axis + * @param gridW Grid/quantization size (along height dimension) - X axis + * @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image + * @param nchw If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return + * NHWC format labels with array shape [minibatch, h, w, 4+C] + * @param imageTransform ImageTransform - used to transform image and coordinates + */ + public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw, + ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) { super(height, width, channels, null, null); this.gridW = gridW; this.gridH = gridH; + this.nchw = nchw; this.labelProvider = labelProvider; this.appendLabel = labelProvider != null; this.imageTransform = imageTransform; @@ -182,6 +207,10 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader { exampleNum++; } + if(!nchw) { + outImg = outImg.permute(0, 2, 3, 1); //NCHW to NHWC + outLabel = outLabel.permute(0, 2, 3, 1); + } return new NDArrayRecordBatch(Arrays.asList(outImg, outLabel)); } @@ -256,6 +285,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader { imageLoader = new NativeImageLoader(height, width, channels, imageTransform); } Image image = this.imageLoader.asImageMatrix(dataInputStream); + if(!nchw) + image.setImage(image.getImage().permute(0,2,3,1)); Nd4j.getAffinityManager().ensureLocation(image.getImage(), AffinityManager.Location.DEVICE); List ret = RecordConverter.toRecord(image.getImage()); @@ -264,6 +295,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader { int nClasses = labels.size(); INDArray outLabel = Nd4j.create(1, 4 + nClasses, gridH, gridW); label(image, imageObjectsForPath, outLabel, 0); + if(!nchw) + outLabel = outLabel.permute(0,2,3,1); //NCHW to NHWC ret.add(new NDArrayWritable(outLabel)); } return ret; diff --git a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java index d8620096a..5e4598005 100644 --- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java +++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java @@ -56,168 +56,179 @@ public class TestObjectDetectionRecordReader { @Test public void test() throws Exception { - ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider(); + for(boolean nchw : new boolean[]{true, false}) { + ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider(); - File f = testDir.newFolder(); - new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f); + File f = testDir.newFolder(); + new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f); - String path = new File(f, "000012.jpg").getParent(); + String path = new File(f, "000012.jpg").getParent(); - int h = 32; - int w = 32; - int c = 3; - int gW = 13; - int gH = 10; + int h = 32; + int w = 32; + int c = 3; + int gW = 13; + int gH = 10; - //Enforce consistent iteration order for tests - URI[] u = new FileSplit(new File(path)).locations(); - Arrays.sort(u); + //Enforce consistent iteration order for tests + URI[] u = new FileSplit(new File(path)).locations(); + Arrays.sort(u); - RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, lp); - rr.initialize(new CollectionInputSplit(u)); + RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, nchw, lp); + rr.initialize(new CollectionInputSplit(u)); - RecordReader imgRR = new ImageRecordReader(h, w, c); - imgRR.initialize(new CollectionInputSplit(u)); + RecordReader imgRR = new ImageRecordReader(h, w, c, nchw); + imgRR.initialize(new CollectionInputSplit(u)); - List labels = rr.getLabels(); - assertEquals(Arrays.asList("car", "cat"), labels); + List labels = rr.getLabels(); + assertEquals(Arrays.asList("car", "cat"), labels); - //000012.jpg - originally 500x333 - //000019.jpg - originally 500x375 - double[] origW = new double[]{500, 500}; - double[] origH = new double[]{333, 375}; - List> l = Arrays.asList( - Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")), - Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat")) - ); + //000012.jpg - originally 500x333 + //000019.jpg - originally 500x375 + double[] origW = new double[]{500, 500}; + double[] origH = new double[]{333, 375}; + List> l = Arrays.asList( + Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")), + Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat")) + ); - for (int idx = 0; idx < 2; idx++) { - assertTrue(rr.hasNext()); - List next = rr.next(); - List nextImgRR = imgRR.next(); + for (int idx = 0; idx < 2; idx++) { + assertTrue(rr.hasNext()); + List next = rr.next(); + List nextImgRR = imgRR.next(); - //Check features: - assertEquals(next.get(0), nextImgRR.get(0)); + //Check features: + assertEquals(next.get(0), nextImgRR.get(0)); - //Check labels - assertEquals(2, next.size()); - assertTrue(next.get(0) instanceof NDArrayWritable); - assertTrue(next.get(1) instanceof NDArrayWritable); + //Check labels + assertEquals(2, next.size()); + assertTrue(next.get(0) instanceof NDArrayWritable); + assertTrue(next.get(1) instanceof NDArrayWritable); - List objects = l.get(idx); + List objects = l.get(idx); - INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW); - for (ImageObject io : objects) { - double fracImageX1 = io.getX1() / origW[idx]; - double fracImageY1 = io.getY1() / origH[idx]; - double fracImageX2 = io.getX2() / origW[idx]; - double fracImageY2 = io.getY2() / origH[idx]; + INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW); + for (ImageObject io : objects) { + double fracImageX1 = io.getX1() / origW[idx]; + double fracImageY1 = io.getY1() / origH[idx]; + double fracImageX2 = io.getX2() / origW[idx]; + double fracImageY2 = io.getY2() / origH[idx]; - double x1C = (fracImageX1 + fracImageX2) / 2.0; - double y1C = (fracImageY1 + fracImageY2) / 2.0; + double x1C = (fracImageX1 + fracImageX2) / 2.0; + double y1C = (fracImageY1 + fracImageY2) / 2.0; - int labelGridX = (int) (x1C * gW); - int labelGridY = (int) (y1C * gH); + int labelGridX = (int) (x1C * gW); + int labelGridY = (int) (y1C * gH); - int labelIdx; - if (io.getLabel().equals("car")) { - labelIdx = 4; - } else { - labelIdx = 5; + int labelIdx; + if (io.getLabel().equals("car")) { + labelIdx = 4; + } else { + labelIdx = 5; + } + expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0); + + expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW); + expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH); + expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW); + expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH); } - expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0); - expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW); - expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH); - expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW); - expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH); + INDArray lArr = ((NDArrayWritable) next.get(1)).get(); + if(nchw) { + assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape()); + } else { + assertArrayEquals(new long[]{1, gH, gW, 4 + 2}, lArr.shape()); + } + + if(!nchw) + expLabels = expLabels.permute(0,2,3,1); //NCHW to NHWC + + assertEquals(expLabels, lArr); } - INDArray lArr = ((NDArrayWritable) next.get(1)).get(); - assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape()); - assertEquals(expLabels, lArr); - } + rr.reset(); + Record record = rr.nextRecord(); + RecordMetaDataImageURI metadata = (RecordMetaDataImageURI) record.getMetaData(); + assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI())); + assertEquals(3, metadata.getOrigC()); + assertEquals((int) origH[0], metadata.getOrigH()); + assertEquals((int) origW[0], metadata.getOrigW()); - rr.reset(); - Record record = rr.nextRecord(); - RecordMetaDataImageURI metadata = (RecordMetaDataImageURI)record.getMetaData(); - assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI())); - assertEquals(3, metadata.getOrigC()); - assertEquals((int)origH[0], metadata.getOrigH()); - assertEquals((int)origW[0], metadata.getOrigW()); + List out = new ArrayList<>(); + List meta = new ArrayList<>(); + out.add(record); + meta.add(metadata); + record = rr.nextRecord(); + metadata = (RecordMetaDataImageURI) record.getMetaData(); + out.add(record); + meta.add(metadata); - List out = new ArrayList<>(); - List meta = new ArrayList<>(); - out.add(record); - meta.add(metadata); - record = rr.nextRecord(); - metadata = (RecordMetaDataImageURI)record.getMetaData(); - out.add(record); - meta.add(metadata); + List fromMeta = rr.loadFromMetaData(meta); + assertEquals(out, fromMeta); - List fromMeta = rr.loadFromMetaData(meta); - assertEquals(out, fromMeta); + // make sure we don't lose objects just by explicitly resizing + int i = 0; + int[] nonzeroCount = {5, 10}; - // make sure we don't lose objects just by explicitly resizing - int i = 0; - int[] nonzeroCount = {5, 10}; + ImageTransform transform = new ResizeImageTransform(37, 42); + RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, nchw, lp, transform); + rrTransform.initialize(new CollectionInputSplit(u)); + i = 0; + while (rrTransform.hasNext()) { + List next = rrTransform.next(); + assertEquals(37, transform.getCurrentImage().getWidth()); + assertEquals(42, transform.getCurrentImage().getHeight()); + INDArray labelArray = ((NDArrayWritable) next.get(1)).get(); + BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); + assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); + } - ImageTransform transform = new ResizeImageTransform(37, 42); - RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, lp, transform); - rrTransform.initialize(new CollectionInputSplit(u)); - i = 0; - while (rrTransform.hasNext()) { - List next = rrTransform.next(); - assertEquals(37, transform.getCurrentImage().getWidth()); - assertEquals(42, transform.getCurrentImage().getHeight()); - INDArray labelArray = ((NDArrayWritable)next.get(1)).get(); - BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); - assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); - } + ImageTransform transform2 = new ResizeImageTransform(1024, 2048); + RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform2); + rrTransform2.initialize(new CollectionInputSplit(u)); + i = 0; + while (rrTransform2.hasNext()) { + List next = rrTransform2.next(); + assertEquals(1024, transform2.getCurrentImage().getWidth()); + assertEquals(2048, transform2.getCurrentImage().getHeight()); + INDArray labelArray = ((NDArrayWritable) next.get(1)).get(); + BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); + assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); + } + + //Make sure image flip does not break labels and are correct for new image size dimensions: + ImageTransform transform3 = new PipelineImageTransform( + new ResizeImageTransform(2048, 4096), + new FlipImageTransform(-1) + ); + RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform3); + rrTransform3.initialize(new CollectionInputSplit(u)); + i = 0; + while (rrTransform3.hasNext()) { + List next = rrTransform3.next(); + INDArray labelArray = ((NDArrayWritable) next.get(1)).get(); + BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); + assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); + } + + //Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception: + ImageTransform transform4 = new FlipImageTransform(-1); + RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, nchw, lp, transform4); + rrTransform4.initialize(new CollectionInputSplit(u)); + i = 0; + while (rrTransform4.hasNext()) { + List next = rrTransform4.next(); + + assertEquals((int) origW[i], transform4.getCurrentImage().getWidth()); + assertEquals((int) origH[i], transform4.getCurrentImage().getHeight()); + + INDArray labelArray = ((NDArrayWritable) next.get(1)).get(); + BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); + assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); + } - ImageTransform transform2 = new ResizeImageTransform(1024, 2048); - RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform2); - rrTransform2.initialize(new CollectionInputSplit(u)); - i = 0; - while (rrTransform2.hasNext()) { - List next = rrTransform2.next(); - assertEquals(1024, transform2.getCurrentImage().getWidth()); - assertEquals(2048, transform2.getCurrentImage().getHeight()); - INDArray labelArray = ((NDArrayWritable)next.get(1)).get(); - BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); - assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); - } - - //Make sure image flip does not break labels and are correct for new image size dimensions: - ImageTransform transform3 = new PipelineImageTransform( - new ResizeImageTransform(2048, 4096), - new FlipImageTransform(-1) - ); - RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform3); - rrTransform3.initialize(new CollectionInputSplit(u)); - i = 0; - while (rrTransform3.hasNext()) { - List next = rrTransform3.next(); - INDArray labelArray = ((NDArrayWritable)next.get(1)).get(); - BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); - assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); - } - - //Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception: - ImageTransform transform4 = new FlipImageTransform(-1); - RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, lp, transform4); - rrTransform4.initialize(new CollectionInputSplit(u)); - i = 0; - while (rrTransform4.hasNext()) { - List next = rrTransform4.next(); - - assertEquals((int) origW[i], transform4.getCurrentImage().getWidth()); - assertEquals((int) origH[i], transform4.getCurrentImage().getHeight()); - - INDArray labelArray = ((NDArrayWritable)next.get(1)).get(); - BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0)); - assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0)); } } diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index 5646b6519..47c040c12 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -24,9 +24,7 @@ import org.datavec.image.recordreader.objdetect.impl.VocLabelProvider; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.distribution.GaussianDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -36,6 +34,8 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -50,17 +50,28 @@ import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertTrue; /** * @author Alex Black */ +@RunWith(Parameterized.class) public class YoloGradientCheckTests extends BaseDL4JTest { static { Nd4j.setDataType(DataType.DOUBLE); } + private CNN2DFormat format; + public YoloGradientCheckTests(CNN2DFormat format){ + this.format = format; + } + @Parameterized.Parameters(name = "{0}") + public static Object[] params(){ + return CNN2DFormat.values(); + } + @Rule public TemporaryFolder testDir = new TemporaryFolder(); @@ -97,8 +108,14 @@ public class YoloGradientCheckTests extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w}); - INDArray labels = yoloLabels(mb, c, h, w); + INDArray input, labels; + if(format == CNN2DFormat.NCHW){ + input = Nd4j.rand(DataType.DOUBLE, mb, depthIn, h, w); + labels = yoloLabels(mb, c, h, w); + } else { + input = Nd4j.rand(DataType.DOUBLE, mb, h, w, depthIn); + labels = yoloLabels(mb, c, h, w).permute(0,2,3,1); + } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .dataType(DataType.DOUBLE) @@ -112,6 +129,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .layer(new Yolo2OutputLayer.Builder() .boundingBoxPriors(bbPrior) .build()) + .setInputType(InputType.convolutional(h, w, depthIn, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -120,7 +138,18 @@ public class YoloGradientCheckTests extends BaseDL4JTest { String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i]; System.out.println(msg); + INDArray out = net.output(input); + if(format == CNN2DFormat.NCHW){ + assertArrayEquals(new long[]{mb, yoloDepth, h, w}, out.shape()); + } else { + assertArrayEquals(new long[]{mb, h, w, yoloDepth}, out.shape()); + } + + net.fit(input, labels); + + boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input) + .minAbsoluteError(1e-6) .labels(labels).subset(true).maxPerParam(100)); assertTrue(msg, gradOK); diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 24bda07f6..6ffb92978 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -21,6 +21,7 @@ import lombok.Getter; import lombok.Setter; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -80,6 +81,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer { @JsonDeserialize(using = BoundingBoxesDeserializer.class) private INDArray boundingBoxes; + private CNN2DFormat format = CNN2DFormat.NCHW; //Default for serialization of old formats + private Yolo2OutputLayer() { //No-arg constructor for Jackson JSON } @@ -119,7 +122,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer { @Override public void setNIn(InputType inputType, boolean override) { - //No op + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.format = c.getFormat(); } @Override diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java index eb5a4d19e..4d118c62b 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java @@ -19,6 +19,7 @@ package org.deeplearning4j.nn.layers.objdetect; import lombok.*; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; +import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -110,6 +111,12 @@ public class Yolo2OutputLayer extends AbstractLayer Date: Sat, 6 Jun 2020 15:26:55 +0300 Subject: [PATCH 21/21] C++ rearrangements (#485) * initial commit Signed-off-by: raver119@gmail.com * some minor singleton changes Signed-off-by: raver119@gmail.com * more iterations Signed-off-by: raver119 * more singletons updated Signed-off-by: raver119 * more singletons updated Signed-off-by: raver119 * more changes Signed-off-by: raver119@gmail.com * CUDA updates Signed-off-by: raver119@gmail.com * Java side update Signed-off-by: raver119@gmail.com * one commented out test Signed-off-by: raver119@gmail.com --- libnd4j/UnderstandingGraph.md | 2 +- libnd4j/include/array/ConstantDataBuffer.h | 29 +- libnd4j/include/array/ConstantOffsetsBuffer.h | 49 ++++ libnd4j/include/array/ConstantShapeBuffer.h | 49 ++++ .../include/array/CudaPointerDeallocator.h | 38 +++ libnd4j/include/array/DataTypeUtils.h | 6 +- libnd4j/include/array/NDArray.h | 21 +- libnd4j/include/array/NDArray.hXX | 169 ++++++------ libnd4j/include/array/PointerDeallocator.h | 39 +++ libnd4j/include/array/PointerWrapper.h | 49 ++++ .../include/array/PrimaryPointerDeallocator.h | 38 +++ libnd4j/include/array/TadPack.h | 9 +- libnd4j/include/array/cpu/NDArray.cpp | 2 +- .../array/cuda/CudaPointerDeallocator.cu | 29 ++ libnd4j/include/array/cuda/DataBuffer.cu | 12 +- libnd4j/include/array/cuda/NDArray.cu | 2 +- .../include/array/impl/ConstantDataBuffer.cpp | 57 ++-- .../array/impl/ConstantOffsetsBuffer.cpp | 51 ++++ .../array/impl/ConstantShapeBuffer.cpp | 51 ++++ libnd4j/include/array/impl/DataBuffer.cpp | 22 +- .../include/array/impl/PointerDeallocator.cpp | 29 ++ libnd4j/include/array/impl/PointerWrapper.cpp | 37 +++ .../array/impl/PrimaryPointerDeallocator.cpp | 29 ++ libnd4j/include/array/impl/TadPack.cpp | 16 +- libnd4j/include/execution/ThreadPool.h | 6 +- libnd4j/include/execution/Threads.h | 16 +- .../include/execution/cpu/LaunchContext.cpp | 19 +- .../include/execution/cuda/LaunchContext.cu | 52 ++-- libnd4j/include/execution/impl/ThreadPool.cpp | 30 +- libnd4j/include/execution/impl/Threads.cpp | 14 +- libnd4j/include/execution/impl/Ticket.cpp | 6 +- libnd4j/include/graph/ContextPrototype.h | 2 +- libnd4j/include/graph/GraphHolder.h | 3 +- .../graph/execution/impl/LogicReturn.cpp | 4 +- .../graph/execution/impl/LogicWhile.cpp | 2 +- libnd4j/include/graph/impl/Context.cpp | 2 +- libnd4j/include/graph/impl/Graph.cpp | 14 +- .../include/graph/impl/GraphExecutioner.cpp | 26 +- libnd4j/include/graph/impl/GraphHolder.cpp | 10 +- libnd4j/include/graph/impl/Node.cpp | 2 +- libnd4j/include/helpers/BlasHelper.h | 4 +- libnd4j/include/helpers/ConstantHelper.h | 5 +- libnd4j/include/helpers/ConstantShapeHelper.h | 18 +- libnd4j/include/helpers/ConstantTadHelper.h | 4 +- libnd4j/include/helpers/DebugHelper.h | 2 +- libnd4j/include/helpers/LoopKind.h | 2 +- libnd4j/include/helpers/Loops.h | 8 +- libnd4j/include/helpers/OpTracker.h | 4 +- .../helpers/benchmark/BroadcastBenchmark.h | 12 +- .../helpers/benchmark/DeclarableBenchmark.h | 2 +- .../helpers/benchmark/ReductionBenchmark.h | 6 +- .../include/helpers/cpu/ConstantHelper.cpp | 25 +- .../helpers/cpu/ConstantShapeHelper.cpp | 63 ++--- .../include/helpers/cpu/ConstantTadHelper.cpp | 59 +--- libnd4j/include/helpers/cpu/MmulHelper.cpp | 16 +- libnd4j/include/helpers/cpu/cublasHelper.cpp | 11 +- libnd4j/include/helpers/cublasHelper.h | 5 +- .../include/helpers/cuda/ConstantHelper.cu | 29 +- .../helpers/cuda/ConstantShapeHelper.cu | 51 ++-- .../include/helpers/cuda/ConstantTadHelper.cu | 29 +- .../include/helpers/cuda_off/MmulHelper.cu | 8 +- .../include/helpers/cuda_off/cublasHelper.cu | 13 +- libnd4j/include/helpers/helper_hash.h | 4 +- libnd4j/include/helpers/impl/BlasHelper.cpp | 29 +- .../include/helpers/impl/OmpLaunchHelper.cpp | 12 +- libnd4j/include/helpers/impl/OpTracker.cpp | 10 +- libnd4j/include/helpers/impl/ShapeUtils.cpp | 30 +- libnd4j/include/helpers/impl/helper_hash.cpp | 10 +- libnd4j/include/helpers/logger.h | 6 +- libnd4j/include/helpers/shape.h | 8 +- libnd4j/include/legacy/NativeOps.h | 10 +- .../legacy/cpu/NativeOpExecutioner.cpp | 54 ++-- libnd4j/include/legacy/cpu/NativeOps.cpp | 80 +++--- .../legacy/cuda/NativeOpExecutioner.cu | 22 +- libnd4j/include/legacy/cuda/NativeOps.cu | 246 +++++++++-------- libnd4j/include/legacy/impl/Environment.cpp | 23 +- libnd4j/include/loops/cpu/broadcasting.hpp | 6 +- .../include/loops/cpu/broadcasting_bool.hpp | 8 +- .../include/loops/cpu/broadcasting_int.hpp | 8 +- libnd4j/include/loops/cpu/indexreduce.hpp | 4 +- .../include/loops/cpu/reduce/reduce_bool.cpp | 4 +- .../include/loops/cpu/reduce/reduce_float.hpp | 6 +- .../include/loops/cpu/reduce/reduce_long.cpp | 6 +- .../include/loops/cpu/reduce/reduce_same.cpp | 6 +- libnd4j/include/loops/cpu/reduce3.hpp | 2 +- libnd4j/include/loops/cpu/scalar.hpp | 2 +- libnd4j/include/loops/cpu/scalar_bool.cpp | 2 +- libnd4j/include/loops/cpu/scalar_int.cpp | 2 +- .../include/loops/cpu/summarystatsreduce.cpp | 2 +- .../loops/cuda/legacy/transform.legacy | 2 +- libnd4j/include/loops/cuda/scalar.chpp | 2 +- libnd4j/include/loops/cuda/scalar_bool.cu | 2 +- libnd4j/include/loops/cuda/scalar_int.cu | 2 +- .../include/loops/cuda/summarystatsreduce.cu | 6 +- libnd4j/include/memory/MemoryCounter.h | 4 +- libnd4j/include/memory/MemoryRegistrator.h | 3 +- libnd4j/include/memory/MemoryTracker.h | 3 +- libnd4j/include/memory/impl/MemoryCounter.cpp | 14 +- .../include/memory/impl/MemoryRegistrator.cpp | 11 +- libnd4j/include/memory/impl/MemoryTracker.cpp | 14 +- .../include/ops/declarable/OpRegistrator.h | 2 +- .../generic/bitwise/bits_hamming_distance.cpp | 2 +- .../declarable/generic/blas/batched_gemm.cpp | 4 +- .../ops/declarable/generic/blas/matmul.cpp | 2 +- .../declarable/generic/blas/tensormmul.cpp | 2 +- .../ops/declarable/generic/boolean/choose.cpp | 4 +- .../ops/declarable/generic/boolean/where.cpp | 2 +- .../declarable/generic/boolean/where_np.cpp | 4 +- .../generic/compat/compat_sparse_to_dense.cpp | 2 +- .../generic/compat/compat_string_split.cpp | 4 +- .../declarable/generic/compression/bitmap.cpp | 4 +- .../generic/compression/threshold.cpp | 2 +- .../declarable/generic/datatypes/bitcast.cpp | 8 +- .../ops/declarable/generic/datatypes/cast.cpp | 2 +- .../generic/helpers/BroadcastHelper.h | 2 +- .../generic/images/crop_and_resize.cpp | 2 +- .../generic/images/image_resize.cpp | 2 +- .../generic/images/resize_images.cpp | 2 +- .../declarable/generic/images/rgbToGrs.cpp | 2 +- .../generic/kernels/knn_mindistance.cpp | 2 +- .../declarable/generic/linalg/diagPart.cpp | 2 +- .../ops/declarable/generic/linalg/eye.cpp | 2 +- .../ops/declarable/generic/linalg/lstsq.cpp | 8 +- .../generic/linalg/matrixDiagPart.cpp | 2 +- .../generic/linalg/matrix_determinant.cpp | 18 +- .../ops/declarable/generic/linalg/qr.cpp | 8 +- .../generic/linalg/sufficient_statistics.cpp | 4 +- .../ops/declarable/generic/linalg/svd.cpp | 4 +- .../ops/declarable/generic/linalg/trace.cpp | 2 +- .../ops/declarable/generic/linalg/tri.cpp | 2 +- .../generic/loss/absoluteDifference.cpp | 4 +- .../generic/loss/cosineDistance.cpp | 2 +- .../ops/declarable/generic/loss/hingeLoss.cpp | 4 +- .../ops/declarable/generic/loss/huberLoss.cpp | 4 +- .../ops/declarable/generic/loss/l2_loss.cpp | 2 +- .../ops/declarable/generic/loss/logLoss.cpp | 4 +- .../generic/loss/log_poisson_loss.cpp | 4 +- .../generic/loss/meanPairWsSqErr.cpp | 2 +- .../ops/declarable/generic/loss/meanSqErr.cpp | 4 +- .../generic/loss/sigmCrossEntropy.cpp | 4 +- .../generic/loss/softmaxCrossEntropy.cpp | 8 +- .../loss/softmaxCrossEntropyWithLogits.cpp | 4 +- .../generic/nn/activations/crelu.cpp | 4 +- .../ops/declarable/generic/nn/batchnorm.cpp | 4 +- .../ops/declarable/generic/nn/bias_add.cpp | 2 +- .../declarable/generic/nn/convo/deconv2d.cpp | 2 +- .../generic/nn/convo/deconv2d_tf.cpp | 2 +- .../generic/nn/convo/dilation2d.cpp | 4 +- .../generic/nn/dot_product_attention.cpp | 4 +- .../generic/nn/embedding_lookup.cpp | 4 +- .../nn/multi_head_dot_product_attention.cpp | 4 +- .../generic/nn/pooling/avgpool2d.cpp | 4 +- .../generic/nn/pooling/avgpool3d.cpp | 4 +- .../generic/nn/pooling/maxpool2d.cpp | 4 +- .../generic/nn/pooling/maxpool3d.cpp | 4 +- .../nn/pooling/maxpool_with_argmax.cpp | 4 +- .../generic/nn/pooling/pnormpool2d.cpp | 4 +- .../declarable/generic/nn/recurrent/gru.cpp | 12 +- .../generic/nn/recurrent/gruCell.cpp | 2 +- .../generic/nn/recurrent/lstmCell.cpp | 2 +- .../generic/nn/recurrent/lstmLayer.cpp | 6 +- .../declarable/generic/nn/recurrent/sru.cpp | 12 +- .../generic/nn/recurrent/sruCell.cpp | 2 +- .../generic/parity_ops/bincount.cpp | 2 +- .../parity_ops/broadcast_dynamic_shape.cpp | 2 +- .../generic/parity_ops/check_numerics.cpp | 2 +- .../parity_ops/compare_and_bitpack.cpp | 2 +- .../generic/parity_ops/confusion_matrix.cpp | 2 +- .../declarable/generic/parity_ops/expose.cpp | 2 +- .../generic/parity_ops/in_top_k.cpp | 2 +- .../generic/parity_ops/listdiff.cpp | 4 +- .../parity_ops/non_max_suppression.cpp | 4 +- .../non_max_suppression_overlaps.cpp | 2 +- .../generic/parity_ops/nth_element.cpp | 4 +- .../declarable/generic/parity_ops/onehot.cpp | 2 +- .../declarable/generic/parity_ops/top_k.cpp | 2 +- .../declarable/generic/parity_ops/unique.cpp | 12 +- .../generic/parity_ops/zero_fraction.cpp | 2 +- .../declarable/generic/random/bernoulli.cpp | 2 +- .../declarable/generic/random/exponential.cpp | 2 +- .../ops/declarable/generic/random/gamma.cpp | 2 +- .../declarable/generic/random/get_seed.cpp | 2 +- .../declarable/generic/random/multinomial.cpp | 2 +- .../ops/declarable/generic/random/normal.cpp | 2 +- .../ops/declarable/generic/random/poisson.cpp | 2 +- .../declarable/generic/random/random_crop.cpp | 2 +- .../declarable/generic/random/set_seed.cpp | 2 +- .../ops/declarable/generic/random/uniform.cpp | 2 +- .../ops/declarable/generic/reduce/argamax.cpp | 2 +- .../ops/declarable/generic/reduce/argamin.cpp | 2 +- .../ops/declarable/generic/reduce/argmax.cpp | 2 +- .../ops/declarable/generic/reduce/argmin.cpp | 2 +- .../declarable/generic/shape/broadcast_to.cpp | 2 +- .../shape/evaluate_reduction_shape.cpp | 4 +- .../declarable/generic/shape/expand_dims.cpp | 6 +- .../ops/declarable/generic/shape/flatten.cpp | 2 +- .../ops/declarable/generic/shape/order.cpp | 2 +- .../ops/declarable/generic/shape/rank.cpp | 2 +- .../ops/declarable/generic/shape/reshape.cpp | 4 +- .../ops/declarable/generic/shape/shape.cpp | 2 +- .../ops/declarable/generic/shape/shapes.cpp | 2 +- .../ops/declarable/generic/shape/size.cpp | 2 +- .../ops/declarable/generic/shape/size_at.cpp | 2 +- .../ops/declarable/generic/shape/squeeze.cpp | 6 +- .../generic/shape/tile_to_shape.cpp | 2 +- .../ops/declarable/generic/tensor/create.cpp | 2 +- .../ops/declarable/generic/tensor/fill.cpp | 2 +- .../declarable/generic/tensor/lin_space.cpp | 2 +- .../ops/declarable/generic/tensor/ones_as.cpp | 2 +- .../ops/declarable/generic/tensor/range.cpp | 14 +- .../generic/tensor/strided_slice.cpp | 14 +- .../declarable/generic/tensor/zeros_as.cpp | 2 +- .../declarable/generic/tests/test_scalar.cpp | 2 +- .../declarable/generic/tests/testcustom.cpp | 2 +- .../generic/thrid_party/firas_sparse.cpp | 2 +- .../generic/transforms/batch_to_space.cpp | 2 +- .../generic/transforms/batch_to_space_nd.cpp | 2 +- .../transforms/clip_by_global_norm.cpp | 2 +- .../declarable/generic/transforms/concat.cpp | 16 +- .../generic/transforms/depth_to_space.cpp | 2 +- .../generic/transforms/dynamic_stitch.cpp | 2 +- .../declarable/generic/transforms/gather.cpp | 2 +- .../generic/transforms/hashcode.cpp | 2 +- .../generic/transforms/histogram.cpp | 2 +- .../transforms/histogram_fixed_width.cpp | 2 +- .../generic/transforms/merge_add.cpp | 2 +- .../generic/transforms/merge_avg.cpp | 2 +- .../generic/transforms/merge_max.cpp | 2 +- .../generic/transforms/mirrorPad.cpp | 2 +- .../ops/declarable/generic/transforms/pad.cpp | 2 +- .../declarable/generic/transforms/repeat.cpp | 2 +- .../declarable/generic/transforms/slice.cpp | 8 +- .../generic/transforms/space_to_batch.cpp | 2 +- .../generic/transforms/space_to_batch_nd.cpp | 2 +- .../generic/transforms/space_to_depth.cpp | 2 +- .../declarable/generic/transforms/split.cpp | 4 +- .../declarable/generic/transforms/split_v.cpp | 2 +- .../declarable/generic/transforms/stack.cpp | 8 +- .../declarable/generic/transforms/tear.cpp | 4 +- .../declarable/generic/transforms/tile.cpp | 2 +- .../declarable/generic/transforms/unstack.cpp | 6 +- .../generic/util/print_affinity.cpp | 2 +- .../generic/util/print_variable.cpp | 4 +- .../ops/declarable/helpers/cpu/addBias.cpp | 4 +- .../ops/declarable/helpers/cpu/adjust_hue.cpp | 4 +- .../helpers/cpu/adjust_saturation.cpp | 4 +- .../declarable/helpers/cpu/batched_gemm.cpp | 6 +- .../ops/declarable/helpers/cpu/dynamic.cpp | 4 +- .../ops/declarable/helpers/cpu/gather.cpp | 16 +- .../helpers/cpu/gatherTransforms.cpp | 2 +- .../declarable/helpers/cpu/imagesHelpers.cpp | 12 +- .../helpers/cpu/indexReductions.hpp | 4 +- .../ops/declarable/helpers/cpu/ismax.cpp | 4 +- .../ops/declarable/helpers/cpu/lrn.cpp | 8 +- .../ops/declarable/helpers/cpu/lup.cpp | 2 +- .../declarable/helpers/cpu/nth_element.cpp | 2 +- .../ops/declarable/helpers/cpu/one_hot.cpp | 2 +- .../declarable/helpers/cpu/randomShuffle.cpp | 6 +- .../ops/declarable/helpers/cpu/roll.cpp | 4 +- .../ops/declarable/helpers/cpu/scatter.cpp | 8 +- .../ops/declarable/helpers/cpu/softmax.cpp | 2 +- .../ops/declarable/helpers/cpu/stack.cpp | 12 +- .../declarable/helpers/cuda/activations.cu | 4 +- .../ops/declarable/helpers/cuda/adjust_hue.cu | 18 +- .../helpers/cuda/adjust_saturation.cu | 18 +- .../ops/declarable/helpers/cuda/batchnorm.cu | 6 +- .../ops/declarable/helpers/cuda/confusion.cu | 2 +- .../ops/declarable/helpers/cuda/dynamic.cu | 8 +- .../helpers/cuda/extract_patches.cu | 4 +- .../helpers/cuda/histogramFixedWidth.cu | 6 +- .../declarable/helpers/cuda/imagesHelpers.cu | 24 +- .../helpers/cuda/indexReductions.cu | 8 +- .../ops/declarable/helpers/cuda/ismax.cu | 2 +- .../ops/declarable/helpers/cuda/lrn.cu | 8 +- .../ops/declarable/helpers/cuda/lstsq.cu | 2 +- .../ops/declarable/helpers/cuda/lup.cu | 24 +- .../declarable/helpers/cuda/matrix_band.cu | 4 +- .../helpers/cuda/matrix_diag_part.cu | 4 +- .../ops/declarable/helpers/cuda/meshgrid.cu | 2 +- .../declarable/helpers/cuda/nth_element.cu | 2 +- .../ops/declarable/helpers/cuda/percentile.cu | 2 +- .../ops/declarable/helpers/cuda/prefix.cu | 4 +- .../include/ops/declarable/helpers/cuda/qr.cu | 6 +- .../ops/declarable/helpers/cuda/reverse.cu | 4 +- .../ops/declarable/helpers/cuda/roll.cu | 2 +- .../ops/declarable/helpers/cuda/scatter.cu | 40 +-- .../declarable/helpers/cuda/scatter_simple.cu | 2 +- .../declarable/helpers/cuda/scatter_update.cu | 4 +- .../declarable/helpers/cuda/segment_max.cu | 24 +- .../declarable/helpers/cuda/segment_mean.cu | 24 +- .../declarable/helpers/cuda/segment_min.cu | 24 +- .../declarable/helpers/cuda/segment_prod.cu | 24 +- .../declarable/helpers/cuda/segment_sqrtn.cu | 12 +- .../declarable/helpers/cuda/segment_sum.cu | 20 +- .../ops/declarable/helpers/cuda/solve.cu | 10 +- .../ops/declarable/helpers/cuda/stack.cu | 8 +- .../ops/declarable/helpers/cuda/top_k.cu | 8 +- .../ops/declarable/helpers/cuda/transforms.cu | 2 +- .../helpers/cuda/triangular_solve.cu | 10 +- .../include/ops/declarable/impl/BooleanOp.cpp | 2 +- .../declarable/impl/BroadcastableBoolOp.cpp | 18 +- .../ops/declarable/impl/BroadcastableOp.cpp | 20 +- .../ops/declarable/impl/DeclarableListOp.cpp | 2 +- .../ops/declarable/impl/DeclarableOp.cpp | 30 +- .../declarable/impl/DeclarableReductionOp.cpp | 2 +- .../declarable/impl/LegacyBroadcastBoolOp.cpp | 14 +- .../ops/declarable/impl/LegacyBroadcastOp.cpp | 12 +- .../declarable/impl/LegacyIndexReduceOp.cpp | 14 +- .../impl/LegacyPairwiseTransformBoolOp.cpp | 2 +- .../ops/declarable/impl/LegacyRandomOp.cpp | 2 +- .../ops/declarable/impl/LegacyReduce3Op.cpp | 12 +- .../declarable/impl/LegacyReduceBoolOp.cpp | 12 +- .../declarable/impl/LegacyReduceFloatOp.cpp | 12 +- .../declarable/impl/LegacyReduceLongOp.cpp | 12 +- .../declarable/impl/LegacyReduceSameOp.cpp | 12 +- .../ops/declarable/impl/LegacyScalarOp.cpp | 2 +- .../ops/declarable/impl/LegacyStatsOp.cpp | 6 +- .../declarable/impl/LegacyTransformBoolOp.cpp | 2 +- .../ops/declarable/impl/OpDescriptor.cpp | 6 +- .../ops/declarable/impl/OpRegistrator.cpp | 32 +-- .../ops/declarable/impl/PlatformHelper.cpp | 2 +- libnd4j/include/ops/impl/gemm.cpp | 2 +- libnd4j/include/ops/impl/specials_double.hpp | 8 +- libnd4j/include/ops/special_random_ops.h | 12 +- libnd4j/include/system/Environment.h | 6 +- libnd4j/include/system/op_boilerplate.h | 34 +-- libnd4j/include/system/platform_boilerplate.h | 2 +- libnd4j/minifier/minifier.cpp | 2 +- libnd4j/server/GraphServer.cpp | 10 +- .../layers_tests/ConditionalTests.cpp | 12 +- .../layers_tests/ConstantShapeHelperTests.cpp | 40 +-- .../layers_tests/ConvolutionTests1.cpp | 1 - .../layers_tests/CudaBasicsTests1.cu | 26 +- .../layers_tests/CudaBasicsTests2.cu | 20 +- .../layers_tests/DataBufferTests.cpp | 26 +- .../layers_tests/DataBufferTestsCuda.cu | 34 +-- .../layers_tests/DeclarableOpsTests1.cpp | 24 +- .../layers_tests/DeclarableOpsTests12.cpp | 8 +- .../layers_tests/DeclarableOpsTests13.cpp | 6 +- .../layers_tests/DeclarableOpsTests14.cpp | 2 +- .../layers_tests/DeclarableOpsTests3.cpp | 20 +- .../layers_tests/DeclarableOpsTests4.cpp | 2 +- libnd4j/tests_cpu/layers_tests/EmptyTests.cpp | 2 +- .../layers_tests/ExtraArgumentsTests.cpp | 2 +- .../layers_tests/FlatBuffersTests.cpp | 24 +- .../layers_tests/GraphHolderTests.cpp | 28 +- .../layers_tests/GraphStateTests.cpp | 8 +- libnd4j/tests_cpu/layers_tests/GraphTests.cpp | 8 +- .../tests_cpu/layers_tests/HashUtilsTests.cpp | 4 +- .../layers_tests/JavaInteropTests.cpp | 48 ++-- .../layers_tests/LegacyOpsCudaTests.cu | 2 +- .../tests_cpu/layers_tests/LegacyOpsTests.cpp | 28 +- libnd4j/tests_cpu/layers_tests/MmapTests.cpp | 2 +- .../layers_tests/MultiDataTypeTests.cpp | 80 +++--- .../layers_tests/NDArrayCudaBasicsTests.cu | 6 +- .../tests_cpu/layers_tests/NativeOpsTests.cpp | 28 +- .../layers_tests/OmpLaunchHelperTests.cpp | 12 +- .../tests_cpu/layers_tests/OpTrackerTests.cpp | 12 +- .../layers_tests/PlaygroundTests.cpp | 16 +- .../layers_tests/ServerRelatedTests.cpp | 26 +- .../tests_cpu/layers_tests/SortCpuTests.cpp | 8 +- libnd4j/tests_cpu/layers_tests/TadTests.cpp | 4 +- .../tests_cpu/layers_tests/ThreadsTests.cpp | 24 +- .../tests_cpu/layers_tests/WorkspaceTests.cpp | 24 +- .../java/org/nd4j/nativeblas/NativeOps.java | 9 +- .../nativeblas/OpaqueConstantShapeBuffer.java | 27 ++ .../ops/executioner/CudaExecutioner.java | 10 +- .../java/org/nd4j/nativeblas/Nd4jCuda.java | 227 +++++++++++++-- .../org/nd4j/nativeblas/Nd4jCudaPresets.java | 8 +- .../nativecpu/ops/NativeOpExecutioner.java | 6 +- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 259 +++++++++++++++--- .../org/nd4j/nativeblas/Nd4jCpuPresets.java | 8 +- .../profiling/PerformanceTrackerTests.java | 1 + 373 files changed, 2698 insertions(+), 1836 deletions(-) create mode 100644 libnd4j/include/array/ConstantOffsetsBuffer.h create mode 100644 libnd4j/include/array/ConstantShapeBuffer.h create mode 100644 libnd4j/include/array/CudaPointerDeallocator.h create mode 100644 libnd4j/include/array/PointerDeallocator.h create mode 100644 libnd4j/include/array/PointerWrapper.h create mode 100644 libnd4j/include/array/PrimaryPointerDeallocator.h create mode 100644 libnd4j/include/array/cuda/CudaPointerDeallocator.cu create mode 100644 libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp create mode 100644 libnd4j/include/array/impl/ConstantShapeBuffer.cpp create mode 100644 libnd4j/include/array/impl/PointerDeallocator.cpp create mode 100644 libnd4j/include/array/impl/PointerWrapper.cpp create mode 100644 libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java diff --git a/libnd4j/UnderstandingGraph.md b/libnd4j/UnderstandingGraph.md index 7e2231c08..d1c51b428 100644 --- a/libnd4j/UnderstandingGraph.md +++ b/libnd4j/UnderstandingGraph.md @@ -77,7 +77,7 @@ If you're adding new ops, and want to make sure they run ok on your specific dev Despite being simple - it still provides you with time spent in various parts of Graph. ```c++ -Environment::getInstance()->setProfiling(true); +Environment::getInstance().setProfiling(true); auto graph = GraphExecutioner::importFromFlatBuffers("./resources/ae_00.fb"); auto profile = GraphProfilingHelper::profile(graph, 1000); diff --git a/libnd4j/include/array/ConstantDataBuffer.h b/libnd4j/include/array/ConstantDataBuffer.h index e8bafe114..197b93307 100644 --- a/libnd4j/include/array/ConstantDataBuffer.h +++ b/libnd4j/include/array/ConstantDataBuffer.h @@ -22,37 +22,40 @@ #include #include +#include +#include +#include namespace sd { class ND4J_EXPORT ConstantDataBuffer { private: - Nd4jPointer _primaryBuffer = nullptr; - Nd4jPointer _specialBuffer = nullptr; - Nd4jLong _length = 0; - Nd4jLong _sizeOf = 0; + std::shared_ptr _primaryBuffer; + std::shared_ptr _specialBuffer = nullptr; + uint64_t _length = 0; + uint8_t _sizeOf = 0; public: - ConstantDataBuffer(Nd4jPointer primary, Nd4jPointer special, Nd4jLong numEelements, Nd4jLong sizeOf); + ConstantDataBuffer(const std::shared_ptr& primary, uint64_t numEelements, DataType dype); + ConstantDataBuffer(const std::shared_ptr& primary, const std::shared_ptr& special, uint64_t numEelements, DataType dype); ConstantDataBuffer(const ConstantDataBuffer &other); ConstantDataBuffer() = default; ~ConstantDataBuffer() = default; - Nd4jLong sizeOf() const; - Nd4jLong length() const; + uint8_t sizeOf() const; + uint64_t length() const; - Nd4jPointer primary() const; - Nd4jPointer special() const; + void* primary() const; + void* special() const; ConstantDataBuffer& operator=(const ConstantDataBuffer& other) = default; ConstantDataBuffer& operator=(ConstantDataBuffer&& other) noexcept = default; + template + T* primaryAsT() const; template - T* primaryAsT(); - - template - T* specialAsT(); + T* specialAsT() const; }; } diff --git a/libnd4j/include/array/ConstantOffsetsBuffer.h b/libnd4j/include/array/ConstantOffsetsBuffer.h new file mode 100644 index 000000000..61c1e381f --- /dev/null +++ b/libnd4j/include/array/ConstantOffsetsBuffer.h @@ -0,0 +1,49 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ +#define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ + +#include +#include +#include +#include + +namespace sd { + +class ND4J_EXPORT ConstantOffsetsBuffer { + private: + std::shared_ptr _primaryOffsets; + std::shared_ptr _specialOffsets; + + public: + ConstantOffsetsBuffer(const std::shared_ptr &primary); + ConstantOffsetsBuffer(const std::shared_ptr &primary, const std::shared_ptr &special); + ConstantOffsetsBuffer() = default; + ~ConstantOffsetsBuffer() = default; + + const Nd4jLong* primary() const; + const Nd4jLong* special() const; + const Nd4jLong* platform() const; +}; + +} // namespace sd + +#endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ diff --git a/libnd4j/include/array/ConstantShapeBuffer.h b/libnd4j/include/array/ConstantShapeBuffer.h new file mode 100644 index 000000000..299653271 --- /dev/null +++ b/libnd4j/include/array/ConstantShapeBuffer.h @@ -0,0 +1,49 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_ +#define SD_ARRAY_CONSTANTSHAPEBUFFER_H_ + +#include +#include +#include +#include + +namespace sd { + +class ND4J_EXPORT ConstantShapeBuffer { + private: + std::shared_ptr _primaryShapeInfo; + std::shared_ptr _specialShapeInfo; + + public: + ConstantShapeBuffer(const std::shared_ptr &primary); + ConstantShapeBuffer(const std::shared_ptr &primary, const std::shared_ptr &special); + ConstantShapeBuffer() = default; + ~ConstantShapeBuffer() = default; + + const Nd4jLong* primary() const; + const Nd4jLong* special() const; + const Nd4jLong* platform() const; +}; + +} // namespace sd + +#endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_ diff --git a/libnd4j/include/array/CudaPointerDeallocator.h b/libnd4j/include/array/CudaPointerDeallocator.h new file mode 100644 index 000000000..c5c817aeb --- /dev/null +++ b/libnd4j/include/array/CudaPointerDeallocator.h @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_CUDAYPOINTERDEALLOCATOR_H_ +#define SD_CUDAYPOINTERDEALLOCATOR_H_ + +#include +#include +#include + +namespace sd { +class ND4J_EXPORT CudaPointerDeallocator : public PointerDeallocator { + public: + CudaPointerDeallocator() = default; + ~CudaPointerDeallocator() = default; + + void release(void* ptr) override; +}; +} + +#endif //SD_CUDAYPOINTERDEALLOCATOR_H_ diff --git a/libnd4j/include/array/DataTypeUtils.h b/libnd4j/include/array/DataTypeUtils.h index bd89605d1..686b5bc97 100644 --- a/libnd4j/include/array/DataTypeUtils.h +++ b/libnd4j/include/array/DataTypeUtils.h @@ -110,7 +110,7 @@ namespace sd { // if proposed dataType is already floating point - return it if (isR(typeX)) return typeX; - return Environment::getInstance()->defaultFloatDataType(); + return Environment::getInstance().defaultFloatDataType(); } FORCEINLINE bool DataTypeUtils::isR(sd::DataType dataType) { @@ -154,7 +154,7 @@ namespace sd { // if both data types are float - return biggest one if (rX && rY) { // if we allow precision boost, then we pick bigger data type - if (sd::Environment::getInstance()->precisionBoostAllowed()) { + if (sd::Environment::getInstance().precisionBoostAllowed()) { return nd4j_max(typeX, typeY); } else { // and we return first operand otherwise @@ -165,7 +165,7 @@ namespace sd { // if that's not real type, we apply same rules if (!rX && !rY) { - if (sd::Environment::getInstance()->precisionBoostAllowed()) { + if (sd::Environment::getInstance().precisionBoostAllowed()) { return nd4j_max(typeX, typeY); } else { // and we return first operand otherwise diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index c314d25b6..7b32b7d49 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -45,6 +45,7 @@ #include #include #include +#include namespace sd { @@ -155,8 +156,8 @@ namespace sd { /** * contains shape info: matrix rank, numbers of elements per each dimension, dimensions strides, element-wise-stride, c-like or fortan-like order */ - Nd4jLong *_shapeInfo = nullptr; - Nd4jLong *_shapeInfoD = nullptr; + const Nd4jLong *_shapeInfo = nullptr; + const Nd4jLong *_shapeInfoD = nullptr; /** * pointer on device launch context (with all data needed there). @@ -1219,7 +1220,7 @@ namespace sd { void setShapeInfo(const Nd4jLong *shapeInfo); void setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype); void setShapeInfo(const ShapeDescriptor& descriptor); - void setShapeInfo(const ConstantDataBuffer& shapeBuffer); + void setShapeInfo(const ConstantShapeBuffer& shapeBuffer); /** * returns absolute offset which corresponds to given sequential index @@ -1516,9 +1517,9 @@ FORCEINLINE R NDArray::templatedGet(void const* buffer, Nd4jLong index) const { ////////////////////////////////////////////////////////////////////////// void NDArray::setShapeInfo(Nd4jLong *shapeInfo) { - auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(shapeInfo); - _shapeInfo = buffer.primaryAsT(); - _shapeInfoD = buffer.specialAsT(); + auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(shapeInfo); + _shapeInfo = buffer.primary(); + _shapeInfoD = buffer.special(); if (shapeInfo != nullptr) { _dataType = ArrayOptions::dataType(_shapeInfo); @@ -1535,9 +1536,9 @@ void NDArray::setShapeInfo(Nd4jLong *shapeInfo) { ////////////////////////////////////////////////////////////////////////// void NDArray::setShapeInfo(Nd4jLong *shapeInfo, const sd::DataType dtype) { - auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(shapeInfo); - _shapeInfo = buffer.primaryAsT(); - _shapeInfoD = buffer.specialAsT(); + auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(shapeInfo); + _shapeInfo = buffer.primary(); + _shapeInfoD = buffer.special(); if (shapeInfo != nullptr) { _dataType = dtype; @@ -1623,7 +1624,7 @@ bool NDArray::nonNull() const { if (isEmpty()) return true; - if(!Environment::getInstance()->isCPU()) + if(!Environment::getInstance().isCPU()) return getDataBuffer()->special() != nullptr && specialShapeInfo() != nullptr; return getDataBuffer()->primary() != nullptr && shapeInfo() != nullptr; diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 9e48b05de..eefe169cf 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -181,7 +181,7 @@ NDArray::NDArray(sd::DataType dtype, sd::LaunchContext* context, const bool isSc _buffer->setToZeroBuffers(); } else - setShapeInfo(ConstantShapeHelper::getInstance()->emptyShapeInfo(dtype)); + setShapeInfo(ConstantShapeHelper::getInstance().emptyShapeInfo(dtype)); } ////////////////////////////////////////////////////////////////////////// @@ -1088,9 +1088,11 @@ void NDArray::streamline(char o) { char order = o == 'a' ? this->ordering() : o; syncToDevice(); std::shared_ptr newBuffer = std::make_shared(this->lengthOf() * sizeOfT(), dataType(), getContext()->getWorkspace()); - auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(dataType(), order, rankOf(), shapeOf()); - NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(), static_cast(shapeBuffer.primary()), newBuffer->special(), static_cast(shapeBuffer.special()), nullptr, nullptr, nullptr); - setShapeInfo(static_cast(shapeBuffer.primary())); + auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(dataType(), order, rankOf(), shapeOf()); + NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(), + shapeBuffer.primary(), newBuffer->special(), + shapeBuffer.special(), nullptr, nullptr, nullptr); + setShapeInfo(shapeBuffer); _buffer = newBuffer; _offset = 0; tickWriteDevice(); @@ -1355,7 +1357,7 @@ NDArray NDArray::reduceAlongDimension(sd::reduce::FloatOps op, const std::vector std::vector copy(dimensions); - auto newShape = ShapeUtils::evalReduceShapeInfo('c', copy, *this, isR() ? dataType() : Environment::getInstance()->defaultFloatDataType(), keepDims, supportOldShapes, getContext()->getWorkspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo('c', copy, *this, isR() ? dataType() : Environment::getInstance().defaultFloatDataType(), keepDims, supportOldShapes, getContext()->getWorkspace()); NDArray result(newShape, true, getContext()); @@ -1432,7 +1434,7 @@ NDArray NDArray::reduceNumber(sd::reduce::FloatOps op, void *extraParams) const if (isS()) throw std::runtime_error("NDArray::reduceNumber FloatOps: you can't use this method on String array!"); - auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataTypeUtils::pickFloatingType(dataType())); + auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataTypeUtils::pickFloatingType(dataType())); NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); @@ -1461,7 +1463,7 @@ NDArray NDArray::reduceNumber(sd::reduce::BoolOps op, void *extraParams) const { if (isS()) throw std::runtime_error("NDArray::reduceNumber BoolOps: you can't use this method on String array!"); - auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::BOOL); + auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::BOOL); NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); @@ -1476,7 +1478,7 @@ NDArray NDArray::reduceNumber(sd::reduce::LongOps op, void *extraParams) const { if (isS()) throw std::runtime_error("NDArray::reduceNumber LongOps: you can't use this method on String array!"); - auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64); + auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT64); NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); @@ -1854,8 +1856,7 @@ void NDArray::setAttached(bool reallyAttached) { ////////////////////////////////////////////////////////////////////////// // calculate strides void NDArray::updateStrides(const char order) { - shape::updateStrides(_shapeInfo, order); - syncShape(); + throw std::runtime_error("Forbidden method"); } ////////////////////////////////////////////////////////////////////////// @@ -2456,7 +2457,7 @@ void NDArray::operator+=(const NDArray& other) { if (isS()) throw std::runtime_error("NDArray::operator+=: you can't use this method on String array!"); - if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) throw sd::datatype_exception::build("NDArray operator+=: Cannot add different types", this->dataType(), other.dataType()); if (this->lengthOf() != 1 && other.lengthOf() == 1) { @@ -2490,7 +2491,7 @@ void NDArray::operator-=(const NDArray& other) { if (isS()) throw std::runtime_error("NDArray::operator-=: you can't use this method on String array!"); - if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) throw sd::datatype_exception::build("NDArray operator-=: Cannot subtract different types", this->dataType(), other.dataType()); if (lengthOf() != 1 && other.lengthOf() == 1) { @@ -2523,7 +2524,7 @@ void NDArray::operator-=(const NDArray& other) { void NDArray::operator*=(const NDArray& other) { if (isS()) throw std::runtime_error("NDArray::operator*=: you can't use this method on String array!"); - if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL)) throw sd::datatype_exception::build("NDArray operator*=: Cannot multiply different types", this->dataType(), other.dataType()); if (lengthOf() != 1 && other.lengthOf() == 1) { @@ -2559,7 +2560,7 @@ void NDArray::operator/=(const NDArray& other) { if (other.isB()) throw std::runtime_error("NDArray::operator/=: you can't divide by bool array!"); - if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType()) { + if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType()) { throw sd::datatype_exception::build("NDArray operator/=: Cannot divide different types", this->dataType(), other.dataType()); } @@ -2832,14 +2833,14 @@ void NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& other, Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = xPack.primary(); + xShapeInfoD = xPack.special(); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = yPack.primary(); + yShapeInfoD = yPack.special(); } NDArray::prepareSpecialUse({&target}, {this, &other}); @@ -2883,14 +2884,14 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = xPack.primary(); + xShapeInfoD = xPack.special(); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = yPack.primary(); + yShapeInfoD = yPack.special(); } NDArray::prepareSpecialUse({&target}, {this, &other}); @@ -2934,12 +2935,12 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); xShapeInfoH = reinterpret_cast(xPack.primary()); xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); yShapeInfoH = reinterpret_cast(yPack.primary()); yShapeInfoD = reinterpret_cast(yPack.special()); } @@ -3067,7 +3068,7 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3088,12 +3089,12 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); xShapeInfoH = reinterpret_cast(xPack.primary()); xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); yShapeInfoH = reinterpret_cast(yPack.primary()); yShapeInfoD = reinterpret_cast(yPack.special()); } @@ -3119,7 +3120,7 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3142,12 +3143,12 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); xShapeInfoH = reinterpret_cast(xPack.primary()); xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); yShapeInfoH = reinterpret_cast(yPack.primary()); yShapeInfoD = reinterpret_cast(yPack.special()); } @@ -3174,7 +3175,7 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3197,12 +3198,12 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); xShapeInfoH = reinterpret_cast(xPack.primary()); xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); yShapeInfoH = reinterpret_cast(yPack.primary()); yShapeInfoD = reinterpret_cast(yPack.special()); } @@ -3220,8 +3221,8 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::initializer_list< //////////////////////////////////////////////////////////////////////// void* NDArray::operator new(size_t i) { - if (sd::memory::MemoryRegistrator::getInstance()->hasWorkspaceAttached()) { - sd::memory::Workspace* ws = sd::memory::MemoryRegistrator::getInstance()->getWorkspace(); + if (sd::memory::MemoryRegistrator::getInstance().hasWorkspaceAttached()) { + sd::memory::Workspace* ws = sd::memory::MemoryRegistrator::getInstance().getWorkspace(); return ws->allocateBytes((Nd4jLong) i); } else { @@ -3233,7 +3234,7 @@ void* NDArray::operator new(size_t i) { //////////////////////////////////////////////////////////////////////// void NDArray::operator delete(void* p) { - if (!sd::memory::MemoryRegistrator::getInstance()->hasWorkspaceAttached()) + if (!sd::memory::MemoryRegistrator::getInstance().hasWorkspaceAttached()) free(p); } @@ -3439,8 +3440,8 @@ void NDArray::varianceAlongDimension(sd::variance::Ops op, NDArray& target, cons NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), biasCorrected); else { std::vector copy(dimensions); - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimensions); + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimensions); NativeOpExecutioner::execSummaryStats(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected); synchronize("NDArray::varianceAlongDimension"); } @@ -4109,8 +4110,8 @@ void NDArray::applyIndexReduce(sd::indexreduce::Ops op, NDArray& target, const s else { std::vector copy = dimensions; shape::checkDimensions(rankOf(), copy); - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy); NativeOpExecutioner::execIndexReduce(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); synchronize("NDArray::applyIndexReduce"); } @@ -4183,10 +4184,10 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const s } else { - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(other.shapeInfo(), copy); if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo()) || (packX.numberOfTads() != packY.numberOfTads() && packX.numberOfTads() != 1 && packY.numberOfTads() != 1)) throw std::runtime_error("NDArray::applyReduce3 cuda method: arrays tads are inconsistent !"); @@ -4212,15 +4213,15 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons shape::checkDimensions(rankOf(), copy); shape::checkDimensions(other.rankOf(), copy); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy); + auto packY = ConstantTadHelper::getInstance().tadForDimensions(other.shapeInfo(), copy); // check tads shapes if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo())) throw std::runtime_error("NDArray::applyAllReduce3 method: the shapes of array tads are different !"); // set newShape for output array - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataTypeUtils::pickFloatingType(dataType()), 'c', {packX.numberOfTads(), packY.numberOfTads()}); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(DataTypeUtils::pickFloatingType(dataType()), 'c', {packX.numberOfTads(), packY.numberOfTads()}); // create output array NDArray result(newShape, true, getContext()); @@ -4228,7 +4229,7 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons // create dynamic array of extra parameters if array extraParams is empty (==nullptr) void* params = extraParams != nullptr ? const_cast(extraParams)->argumentsAsT(dataType()) : nullptr; - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; NDArray::prepareSpecialUse({&result}, {this, &other}); NativeOpExecutioner::execReduce3All(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); @@ -4260,7 +4261,7 @@ void NDArray::reduceAlongDimension(sd::reduce::FloatOps op, NDArray& target, con NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(),nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy); NativeOpExecutioner::execReduceFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension FloatOps"); @@ -4291,8 +4292,8 @@ void NDArray::reduceAlongDimension(sd::reduce::SameOps op, NDArray& target, cons NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { //if (!isEmpty()) { - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy); NativeOpExecutioner::execReduceSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension SameOps"); @@ -4323,8 +4324,8 @@ void NDArray::reduceAlongDimension(sd::reduce::LongOps op, NDArray& target, cons NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy); NativeOpExecutioner::execReduceLong(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4355,8 +4356,8 @@ void NDArray::reduceAlongDimension(sd::reduce::BoolOps op, NDArray& target, cons NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { - auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr; + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy); NativeOpExecutioner::execReduceBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4524,7 +4525,7 @@ void NDArray::addRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4543,7 +4544,7 @@ void NDArray::subRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4563,7 +4564,7 @@ void NDArray::mulRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4584,7 +4585,7 @@ void NDArray::divRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4602,7 +4603,7 @@ void NDArray::addiRowVector(const NDArray& row) { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&row}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4620,7 +4621,7 @@ void NDArray::addColumnVector(const NDArray &column, NDArray &target) const { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &column}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4637,7 +4638,7 @@ void NDArray::addiColumnVector(const NDArray &column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4654,7 +4655,7 @@ void NDArray::muliColumnVector(const NDArray& column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); @@ -4695,7 +4696,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector &indices if (indices.size() == 0) return result; - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), const_cast(dimensions.data()), dimensions.size()); + auto pack = ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), const_cast(dimensions.data()), dimensions.size()); auto tadLength = shape::length(pack.primaryShapeInfo()); auto numTads = lengthOf() / tadLength; @@ -4816,7 +4817,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector &dimensions) throw std::runtime_error("NDArray::allTensorsAlongDimension static function: all input dimensions must be smaller than rank of input array !"); - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_shapeInfo, const_cast(dimensions.data()), dimensions.size()); + auto pack = ConstantTadHelper::getInstance().tadForDimensions(_shapeInfo, const_cast(dimensions.data()), dimensions.size()); auto numTads = pack.numberOfTads(); for (Nd4jLong idx = 0; idx < numTads; idx++ ) { @@ -4929,11 +4930,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo) { if (shapeInfo != nullptr) { ShapeDescriptor descriptor(shapeInfo); - auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor); + auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor); - _shapeInfo = reinterpret_cast(shapeBuffer.primary()); + _shapeInfo = shapeBuffer.primary(); #ifdef __CUDABLAS__ - _shapeInfoD = reinterpret_cast(shapeBuffer.special()); + _shapeInfoD = shapeBuffer.special(); #endif if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY) @@ -4956,11 +4957,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype) Nd4jLong* shapeInfoTemp = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dtype, true, getContext()->getWorkspace()); ShapeDescriptor descriptor(shapeInfoTemp); - auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor); + auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor); - _shapeInfo = reinterpret_cast(shapeBuffer.primary()); + _shapeInfo = shapeBuffer.primary(); #ifdef __CUDABLAS__ - _shapeInfoD = reinterpret_cast(shapeBuffer.special()); + _shapeInfoD = shapeBuffer.special(); #endif if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY) @@ -4979,11 +4980,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype) ////////////////////////////////////////////////////////////////////////// void NDArray::setShapeInfo(const ShapeDescriptor& descriptor) { - auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(const_cast(descriptor)); + auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(const_cast(descriptor)); - _shapeInfo = reinterpret_cast(shapeBuffer.primary()); + _shapeInfo = shapeBuffer.primary(); #ifdef __CUDABLAS__ - _shapeInfoD = reinterpret_cast(shapeBuffer.special()); + _shapeInfoD = shapeBuffer.special(); #endif if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY) @@ -4995,11 +4996,11 @@ void NDArray::setShapeInfo(const ShapeDescriptor& descriptor) { } ////////////////////////////////////////////////////////////////////////// -void NDArray::setShapeInfo(const ConstantDataBuffer& shapeBuffer) { +void NDArray::setShapeInfo(const ConstantShapeBuffer& shapeBuffer) { - _shapeInfo = reinterpret_cast(const_cast(shapeBuffer).primary()); + _shapeInfo = shapeBuffer.primary(); #ifdef __CUDABLAS__ - _shapeInfoD = reinterpret_cast(const_cast(shapeBuffer).special()); + _shapeInfoD = shapeBuffer.special(); #endif if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY) @@ -5350,7 +5351,7 @@ NDArray operator+(T1&& arr1, T2&& arr2) { if (arr1.isS() || arr2.isS()) throw std::runtime_error("operator+(T&& arr1, T&& arr2): you can't use this method on String arrays!"); - if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) throw sd::datatype_exception::build("operator+(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType()); PointersManager pointersManager(arr1.getContext(), "operator+(T&& arr1, T&& arr2)"); @@ -5400,7 +5401,7 @@ NDArray operator-(T1&& arr1, T2&& arr2) { if (arr1.isS() || arr2.isS()) throw std::runtime_error("operator-(T&& arr1, T&& arr2): you can't use this method on String arrays!"); - if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) throw sd::datatype_exception::build("operator-(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType()); PointersManager pointersManager(arr1.getContext(), "operator-(T&& arr1, T&& arr2)"); @@ -5450,7 +5451,7 @@ NDArray operator*(T1&& arr1, T2&& arr2) { if (arr1.isS() || arr2.isS()) throw std::runtime_error("operator*(T&& arr1, T&& arr2): you can't use this method on String arrays!"); - if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) throw sd::datatype_exception::build("operator*(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType()); PointersManager pointersManager(arr1.getContext(), "operator*(T&& arr1, T&& arr2)"); @@ -5500,7 +5501,7 @@ NDArray operator/(T1&& arr1, T2&& arr2) { if (arr1.isS() || arr2.isS()) throw std::runtime_error("operator/(T&& arr1, T&& arr2): you can't use this method on String arrays!"); - if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) + if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL)) throw sd::datatype_exception::build("operator/(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType()); PointersManager pointersManager(arr1.getContext(), "operator/(T&& arr1, T&& arr2)"); diff --git a/libnd4j/include/array/PointerDeallocator.h b/libnd4j/include/array/PointerDeallocator.h new file mode 100644 index 000000000..5bf820421 --- /dev/null +++ b/libnd4j/include/array/PointerDeallocator.h @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_POINTERDEALLOCATOR_H_ +#define SD_POINTERDEALLOCATOR_H_ + +#include +#include + +namespace sd { + +class ND4J_EXPORT PointerDeallocator { + public: + PointerDeallocator() = default; + ~PointerDeallocator() = default; + + virtual void release(void* ptr); +}; + +} + +#endif //SD_POINTERDEALLOCATOR_H_ diff --git a/libnd4j/include/array/PointerWrapper.h b/libnd4j/include/array/PointerWrapper.h new file mode 100644 index 000000000..9e15aaaa3 --- /dev/null +++ b/libnd4j/include/array/PointerWrapper.h @@ -0,0 +1,49 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_ARRAY_POINTER_H_ +#define SD_ARRAY_POINTER_H_ + +#include +#include +#include +#include + +namespace sd { +class ND4J_EXPORT PointerWrapper { + private: + void* _pointer = nullptr; + std::shared_ptr _deallocator; + + public: + PointerWrapper(void* ptr, const std::shared_ptr &deallocator = {}); + PointerWrapper() = default; + ~PointerWrapper(); + + void* pointer() const; + + template + T* pointerAsT() const { + return reinterpret_cast(pointer()); + } +}; +} // namespace sd + +#endif //SD_ARRAY_POINTER_H_ diff --git a/libnd4j/include/array/PrimaryPointerDeallocator.h b/libnd4j/include/array/PrimaryPointerDeallocator.h new file mode 100644 index 000000000..b4fe34764 --- /dev/null +++ b/libnd4j/include/array/PrimaryPointerDeallocator.h @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_PRIMARYPOINTERDEALLOCATOR_H_ +#define SD_PRIMARYPOINTERDEALLOCATOR_H_ + +#include +#include +#include + +namespace sd { +class ND4J_EXPORT PrimaryPointerDeallocator : public PointerDeallocator { + public: + PrimaryPointerDeallocator() = default; + ~PrimaryPointerDeallocator() = default; + + void release(void* ptr) override; +}; +} + +#endif //SD_PRIMARYPOINTERDEALLOCATOR_H_ diff --git a/libnd4j/include/array/TadPack.h b/libnd4j/include/array/TadPack.h index 3cd95fa59..f7ca15fd9 100644 --- a/libnd4j/include/array/TadPack.h +++ b/libnd4j/include/array/TadPack.h @@ -21,17 +21,18 @@ #ifndef DEV_TESTS_TADPACK_H #define DEV_TESTS_TADPACK_H -#include "ConstantDataBuffer.h" +#include +#include namespace sd { class ND4J_EXPORT TadPack { private: - ConstantDataBuffer _tadShape; - ConstantDataBuffer _tadOffsets; + ConstantShapeBuffer _tadShape; + ConstantOffsetsBuffer _tadOffsets; Nd4jLong _numTads = 0 ; int _shapeInfoLength = 0; public: - explicit TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads); + explicit TadPack(const ConstantShapeBuffer &shapes, const ConstantOffsetsBuffer &offets, Nd4jLong numTads); TadPack() = default; ~TadPack() = default; diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp index 873b3fec9..398ebe5e8 100644 --- a/libnd4j/include/array/cpu/NDArray.cpp +++ b/libnd4j/include/array/cpu/NDArray.cpp @@ -338,7 +338,7 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { const int ews = target.ews(); const auto targetLen = target.lengthOf(); if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here -//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided) +//#pragma omp parallel for simd if(targetLen > Environment::getInstance().elementwiseThreshold()) schedule(guided) for(Nd4jLong i=0; i + +namespace sd { + +void CudaPointerDeallocator::release(void *ptr) { + cudaFree(ptr); +} + +} // namespace sd diff --git a/libnd4j/include/array/cuda/DataBuffer.cu b/libnd4j/include/array/cuda/DataBuffer.cu index 922b6967b..7e88e06ba 100644 --- a/libnd4j/include/array/cuda/DataBuffer.cu +++ b/libnd4j/include/array/cuda/DataBuffer.cu @@ -70,16 +70,16 @@ void DataBuffer::allocateSpecial() { auto deviceId = sd::AffinityManager::currentDeviceId(); if (_workspace == nullptr) - if (!sd::memory::MemoryCounter::getInstance()->validate(getLenInBytes())) - throw sd::allocation_exception::build("Requested amount exceeds device limits", sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId), getLenInBytes()); + if (!sd::memory::MemoryCounter::getInstance().validate(getLenInBytes())) + throw sd::allocation_exception::build("Requested amount exceeds device limits", sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId), getLenInBytes()); ALLOCATE_SPECIAL(_specialBuffer, _workspace, getLenInBytes(), int8_t); _isOwnerSpecial = true; if (_workspace == nullptr) { - sd::memory::MemoryCounter::getInstance()->countIn(deviceId, getLenInBytes()); - sd::memory::MemoryCounter::getInstance()->countIn(sd::memory::MemoryType::DEVICE, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countIn(deviceId, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countIn(sd::memory::MemoryType::DEVICE, getLenInBytes()); } } } @@ -135,8 +135,8 @@ void DataBuffer::deleteSpecial() { // count out towards DataBuffer device, only if we're not in workspace if (_workspace == nullptr) { - sd::memory::MemoryCounter::getInstance()->countOut(_deviceId, getLenInBytes()); - sd::memory::MemoryCounter::getInstance()->countOut(sd::memory::MemoryType::DEVICE, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countOut(_deviceId, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countOut(sd::memory::MemoryType::DEVICE, getLenInBytes()); } } } diff --git a/libnd4j/include/array/cuda/NDArray.cu b/libnd4j/include/array/cuda/NDArray.cu index 8ed3eceeb..f28e2ba22 100644 --- a/libnd4j/include/array/cuda/NDArray.cu +++ b/libnd4j/include/array/cuda/NDArray.cu @@ -53,7 +53,7 @@ void* NDArray::platformBuffer() { return specialBuffer(); } void const* NDArray::platformBuffer() const { return specialBuffer(); } Nd4jLong const* NDArray::platformShapeInfo() const { return specialShapeInfo(); } -//Nd4jLong const* NDArray::platformShapeInfo() { return specialShapeInfo(); } +//Nd4jLong const* NDArray::platform() { return special(); } void NDArray::syncToDevice() const { auto currentDeviceId = AffinityManager::currentDeviceId(); diff --git a/libnd4j/include/array/impl/ConstantDataBuffer.cpp b/libnd4j/include/array/impl/ConstantDataBuffer.cpp index 20c842266..2aeda3b6d 100644 --- a/libnd4j/include/array/impl/ConstantDataBuffer.cpp +++ b/libnd4j/include/array/impl/ConstantDataBuffer.cpp @@ -18,29 +18,38 @@ // @author raver119@gmail.com // -#include "../ConstantDataBuffer.h" +#include +#include namespace sd { - ConstantDataBuffer::ConstantDataBuffer(Nd4jPointer primary, Nd4jPointer special, Nd4jLong numEelements, Nd4jLong sizeOf) { - _primaryBuffer = primary; - _specialBuffer = special; - _length = numEelements; - _sizeOf = sizeOf; +ConstantDataBuffer::ConstantDataBuffer( + const std::shared_ptr& primary, + uint64_t numEelements, + DataType dtype) : ConstantDataBuffer(primary, {}, numEelements, dtype) { + // +} + +ConstantDataBuffer::ConstantDataBuffer( + const std::shared_ptr& primary, + const std::shared_ptr& special, + uint64_t numEelements, + DataType dtype) : _primaryBuffer(primary), _specialBuffer(special), _length(numEelements) { + _sizeOf = DataTypeUtils::sizeOf(dtype); } - Nd4jPointer ConstantDataBuffer::primary() const { - return _primaryBuffer; + void* ConstantDataBuffer::primary() const { + return _primaryBuffer->pointer(); } - Nd4jPointer ConstantDataBuffer::special() const { - return _specialBuffer; + void* ConstantDataBuffer::special() const { + return _specialBuffer ? _specialBuffer->pointer() : nullptr; } - Nd4jLong ConstantDataBuffer::sizeOf() const { + uint8_t ConstantDataBuffer::sizeOf() const { return _sizeOf; } - Nd4jLong ConstantDataBuffer::length() const { + uint64_t ConstantDataBuffer::length() const { return _length; } @@ -52,21 +61,21 @@ namespace sd { } template - T* ConstantDataBuffer::primaryAsT() { - return reinterpret_cast(_primaryBuffer); + T* ConstantDataBuffer::primaryAsT() const { + return reinterpret_cast(_primaryBuffer->pointer()); } - template ND4J_EXPORT float* ConstantDataBuffer::primaryAsT(); - template ND4J_EXPORT double* ConstantDataBuffer::primaryAsT(); - template ND4J_EXPORT int* ConstantDataBuffer::primaryAsT(); - template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::primaryAsT(); + template ND4J_EXPORT float* ConstantDataBuffer::primaryAsT() const; + template ND4J_EXPORT double* ConstantDataBuffer::primaryAsT() const; + template ND4J_EXPORT int* ConstantDataBuffer::primaryAsT() const; + template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::primaryAsT() const; template - T* ConstantDataBuffer::specialAsT() { - return reinterpret_cast(_specialBuffer); + T* ConstantDataBuffer::specialAsT() const { + return reinterpret_cast(special()); } - template ND4J_EXPORT float* ConstantDataBuffer::specialAsT(); - template ND4J_EXPORT double* ConstantDataBuffer::specialAsT(); - template ND4J_EXPORT int* ConstantDataBuffer::specialAsT(); - template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::specialAsT(); + template ND4J_EXPORT float* ConstantDataBuffer::specialAsT() const; + template ND4J_EXPORT double* ConstantDataBuffer::specialAsT() const; + template ND4J_EXPORT int* ConstantDataBuffer::specialAsT() const; + template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::specialAsT() const; } diff --git a/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp b/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp new file mode 100644 index 000000000..38b516a84 --- /dev/null +++ b/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include + +namespace sd { +ConstantOffsetsBuffer::ConstantOffsetsBuffer(const std::shared_ptr &primary) : + ConstantOffsetsBuffer(primary, std::shared_ptr(nullptr)) { + // +} + +ConstantOffsetsBuffer::ConstantOffsetsBuffer(const std::shared_ptr &primary, + const std::shared_ptr &special) { + _primaryOffsets = primary; + _specialOffsets = special; +} + +const Nd4jLong *ConstantOffsetsBuffer::primary() const { + return reinterpret_cast(_primaryOffsets->pointer()); +} + +const Nd4jLong *ConstantOffsetsBuffer::special() const { + return _specialOffsets ? reinterpret_cast(_specialOffsets->pointer()) : nullptr; +} + +const Nd4jLong *ConstantOffsetsBuffer::platform() const { +#ifdef __CUDABLAS__ + return special(); +#else + return primary(); +#endif // CUDABLAS +} + +} // namespace sd diff --git a/libnd4j/include/array/impl/ConstantShapeBuffer.cpp b/libnd4j/include/array/impl/ConstantShapeBuffer.cpp new file mode 100644 index 000000000..528101100 --- /dev/null +++ b/libnd4j/include/array/impl/ConstantShapeBuffer.cpp @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include + +namespace sd { +ConstantShapeBuffer::ConstantShapeBuffer(const std::shared_ptr &primary) : + ConstantShapeBuffer(primary, std::shared_ptr(nullptr)) { + // +} + +ConstantShapeBuffer::ConstantShapeBuffer(const std::shared_ptr &primary, + const std::shared_ptr &special) { + _primaryShapeInfo = primary; + _specialShapeInfo = special; +} + +const Nd4jLong *ConstantShapeBuffer::primary() const { + return reinterpret_cast(_primaryShapeInfo->pointer()); +} + +const Nd4jLong *ConstantShapeBuffer::special() const { + return _specialShapeInfo ? reinterpret_cast(_specialShapeInfo->pointer()) : nullptr; +} + +const Nd4jLong *ConstantShapeBuffer::platform() const { +#ifdef __CUDABLAS__ + return special(); +#else + return primary(); +#endif // CUDABLAS +} + +} // namespace sd diff --git a/libnd4j/include/array/impl/DataBuffer.cpp b/libnd4j/include/array/impl/DataBuffer.cpp index 262460e8c..89c386c3d 100644 --- a/libnd4j/include/array/impl/DataBuffer.cpp +++ b/libnd4j/include/array/impl/DataBuffer.cpp @@ -237,14 +237,14 @@ namespace sd { auto deviceId = sd::AffinityManager::currentDeviceId(); // check if this allocation won't bring us above limit if (_workspace == nullptr) { - if (Environment::getInstance()->isCPU()) { + if (Environment::getInstance().isCPU()) { // on cpu backend we validate against device 0 for now - if (!sd::memory::MemoryCounter::getInstance()->validate(getLenInBytes())) - throw sd::allocation_exception::build("Requested amount exceeds HOST device limits", sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId), getLenInBytes()); + if (!sd::memory::MemoryCounter::getInstance().validate(getLenInBytes())) + throw sd::allocation_exception::build("Requested amount exceeds HOST device limits", sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId), getLenInBytes()); } else { // in heterogenous mode we valdate against device group - if (!sd::memory::MemoryCounter::getInstance()->validateGroup(sd::memory::MemoryType::HOST, getLenInBytes())) - throw sd::allocation_exception::build("Requested amount exceeds HOST group limits", sd::memory::MemoryCounter::getInstance()->groupLimit(sd::memory::MemoryType::HOST), getLenInBytes()); + if (!sd::memory::MemoryCounter::getInstance().validateGroup(sd::memory::MemoryType::HOST, getLenInBytes())) + throw sd::allocation_exception::build("Requested amount exceeds HOST group limits", sd::memory::MemoryCounter::getInstance().groupLimit(sd::memory::MemoryType::HOST), getLenInBytes()); } } @@ -253,10 +253,10 @@ namespace sd { // count in towards current deviceId if we're not in workspace mode if (_workspace == nullptr) { - if (Environment::getInstance()->isCPU()) // we don't want this counter to be added to CUDA device - sd::memory::MemoryCounter::getInstance()->countIn(deviceId, getLenInBytes()); + if (Environment::getInstance().isCPU()) // we don't want this counter to be added to CUDA device + sd::memory::MemoryCounter::getInstance().countIn(deviceId, getLenInBytes()); - sd::memory::MemoryCounter::getInstance()->countIn(sd::memory::MemoryType::HOST, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countIn(sd::memory::MemoryType::HOST, getLenInBytes()); } } } @@ -279,10 +279,10 @@ namespace sd { // count out towards DataBuffer device, only if we're not in workspace if (_workspace == nullptr) { - if (Environment::getInstance()->isCPU()) - sd::memory::MemoryCounter::getInstance()->countOut(_deviceId, getLenInBytes()); + if (Environment::getInstance().isCPU()) + sd::memory::MemoryCounter::getInstance().countOut(_deviceId, getLenInBytes()); - sd::memory::MemoryCounter::getInstance()->countOut(sd::memory::MemoryType::HOST, getLenInBytes()); + sd::memory::MemoryCounter::getInstance().countOut(sd::memory::MemoryType::HOST, getLenInBytes()); } } } diff --git a/libnd4j/include/array/impl/PointerDeallocator.cpp b/libnd4j/include/array/impl/PointerDeallocator.cpp new file mode 100644 index 000000000..2cd41cdda --- /dev/null +++ b/libnd4j/include/array/impl/PointerDeallocator.cpp @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include + +namespace sd { + +void PointerDeallocator::release(void *ptr) { + // noop +} + +} // namespace sd diff --git a/libnd4j/include/array/impl/PointerWrapper.cpp b/libnd4j/include/array/impl/PointerWrapper.cpp new file mode 100644 index 000000000..b39cb54aa --- /dev/null +++ b/libnd4j/include/array/impl/PointerWrapper.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include + +namespace sd { +PointerWrapper::PointerWrapper(void *ptr, const std::shared_ptr &deallocator): _pointer(ptr), _deallocator(deallocator) { + // +} + +PointerWrapper::~PointerWrapper() { + if (_deallocator.get() != nullptr) + _deallocator->release(_pointer); +} + +void *PointerWrapper::pointer() const { + return _pointer; +} + +} // namespace sd diff --git a/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp b/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp new file mode 100644 index 000000000..edd58d610 --- /dev/null +++ b/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include + +namespace sd { + +void PrimaryPointerDeallocator::release(void *ptr) { + delete[] reinterpret_cast(ptr); +} + +} // namespace sd diff --git a/libnd4j/include/array/impl/TadPack.cpp b/libnd4j/include/array/impl/TadPack.cpp index 7a3bdbe36..e489d0e83 100644 --- a/libnd4j/include/array/impl/TadPack.cpp +++ b/libnd4j/include/array/impl/TadPack.cpp @@ -23,26 +23,24 @@ #include namespace sd { - TadPack::TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads) { - _tadShape = shapes; - _tadOffsets = offets; + TadPack::TadPack(const ConstantShapeBuffer &shapes, const ConstantOffsetsBuffer &offets, Nd4jLong numTads) : _tadShape(shapes), _tadOffsets(offets) { _numTads = numTads; } const Nd4jLong* TadPack::primaryShapeInfo() const { - return reinterpret_cast(_tadShape.primary()); + return _tadShape.primary(); } const Nd4jLong* TadPack::primaryOffsets() const { - return reinterpret_cast(_tadOffsets.primary()); + return _tadOffsets.primary(); } const Nd4jLong* TadPack::specialShapeInfo() const { - return reinterpret_cast(_tadShape.special()); + return _tadShape.special(); } const Nd4jLong* TadPack::specialOffsets() const { - return reinterpret_cast(_tadOffsets.special()); + return _tadOffsets.special(); } Nd4jLong TadPack::numberOfTads() const { @@ -50,11 +48,11 @@ namespace sd { } const Nd4jLong* TadPack::platformShapeInfo() const { - return sd::Environment::getInstance()->isCPU() ? primaryShapeInfo() : specialShapeInfo(); + return sd::Environment::getInstance().isCPU() ? primaryShapeInfo() : specialShapeInfo(); } const Nd4jLong* TadPack::platformOffsets() const { - return sd::Environment::getInstance()->isCPU() ? primaryOffsets() : specialOffsets(); + return sd::Environment::getInstance().isCPU() ? primaryOffsets() : specialOffsets(); } int TadPack::shapeInfoLength() const { diff --git a/libnd4j/include/execution/ThreadPool.h b/libnd4j/include/execution/ThreadPool.h index 6811f1b1c..ce44d5ae2 100644 --- a/libnd4j/include/execution/ThreadPool.h +++ b/libnd4j/include/execution/ThreadPool.h @@ -35,9 +35,7 @@ namespace samediff { class ND4J_EXPORT ThreadPool { private: - static ThreadPool* _INSTANCE; - - std::vector _threads; + std::vector _threads; std::vector*> _queues; std::vector _interfaces; @@ -48,7 +46,7 @@ namespace samediff { ThreadPool(); ~ThreadPool(); public: - static ThreadPool* getInstance(); + static ThreadPool& getInstance(); /** * This method returns list of pointers to threads ONLY if num_threads of threads were available upon request, returning empty list otherwise diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h index 2ea8295a8..bf35de089 100644 --- a/libnd4j/include/execution/Threads.h +++ b/libnd4j/include/execution/Threads.h @@ -107,7 +107,7 @@ namespace samediff { * @param increment * @return */ - static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads()); /** * This function executes 1 dimensional loop for a given number of threads @@ -119,7 +119,7 @@ namespace samediff { * @param numThreads * @return */ - static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads()); /** * This method will execute function splitting 2 nested loops space with multiple threads @@ -134,7 +134,7 @@ namespace samediff { * @param inc_y * @return */ - static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads(), bool debug = false); + static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads(), bool debug = false); /** * This method will execute function splitting 3 nested loops space with multiple threads @@ -152,7 +152,7 @@ namespace samediff { * @param inc_z * @return */ - static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads()); /** * @@ -160,18 +160,18 @@ namespace samediff { * @param numThreads * @return */ - static int parallel_do(FUNC_DO function, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int parallel_do(FUNC_DO function, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads()); - static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads()); - static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads()); /** * This method will execute function in parallel preserving the parts to be aligned increment size * PLEASE NOTE: this function can use smaller number of threads than requested. * */ - static int parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads()); + static int parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads()); }; } diff --git a/libnd4j/include/execution/cpu/LaunchContext.cpp b/libnd4j/include/execution/cpu/LaunchContext.cpp index 23e78c350..31cb6889d 100644 --- a/libnd4j/include/execution/cpu/LaunchContext.cpp +++ b/libnd4j/include/execution/cpu/LaunchContext.cpp @@ -61,14 +61,19 @@ namespace sd { } - LaunchContext* LaunchContext::defaultContext() { - // TODO: we need it to be device-aware, but only once we add NUMA support for cpu - if (LaunchContext::_contexts.empty()) { - LaunchContext::_contexts.emplace_back(std::make_shared()); - } + static std::mutex _lock; - // return context for current device - return LaunchContext::_contexts[0].get(); + LaunchContext* LaunchContext::defaultContext() { + { + // synchronous block goes here + std::lock_guard lock(_lock); + // TODO: we need it to be device-aware, but only once we add NUMA support for cpu + if (LaunchContext::_contexts.empty()) + LaunchContext::_contexts.emplace_back(std::make_shared()); + } + + // return context for current device + return LaunchContext::_contexts[0].get(); } std::mutex* LaunchContext::deviceMutex() { diff --git a/libnd4j/include/execution/cuda/LaunchContext.cu b/libnd4j/include/execution/cuda/LaunchContext.cu index 8380e50bf..bd51c3504 100644 --- a/libnd4j/include/execution/cuda/LaunchContext.cu +++ b/libnd4j/include/execution/cuda/LaunchContext.cu @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -15,7 +16,7 @@ ******************************************************************************/ // -// Created by raver119 on 30.11.17. +// @author raver119@gmail.com // #include @@ -75,36 +76,37 @@ LaunchContext::LaunchContext() { } LaunchContext* LaunchContext::defaultContext() { - /** - * This method returns LaunchContext, that has multiple entities within: - * 1) temporary buffers. they must be per-thread - * 2) CUDA stream. it must be either per-thread or per-device - * 3) cuBLAS handle. it must be per-device - */ - auto deviceId = AffinityManager::currentDeviceId(); + /** + * This method returns LaunchContext, that has multiple entities within: + * 1) temporary buffers. they must be per-thread + * 2) CUDA stream. it must be either per-thread or per-device + * 3) cuBLAS handle. it must be per-device + */ + auto deviceId = AffinityManager::currentDeviceId(); + { // we need this block synchronous, to avoid double initialization etc - _mutex.lock(); + std::lock_guard lock(_mutex); if (LaunchContext::_contexts.empty()) { - // create one context per device - auto numDevices = AffinityManager::numberOfDevices(); + // create one context per device + auto numDevices = AffinityManager::numberOfDevices(); - _contexts.resize(numDevices); - for (int e = 0; e < numDevices; e++) { - _deviceMutexes[e] = new std::mutex(); + _contexts.resize(numDevices); + for (int e = 0; e < numDevices; e++) { + _deviceMutexes[e] = new std::mutex(); - AffinityManager::setCurrentNativeDevice(e); + AffinityManager::setCurrentNativeDevice(e); - LaunchContext::_contexts[e] = std::make_shared(); - } + LaunchContext::_contexts[e] = std::make_shared(); + } - // don't forget to restore device back again - AffinityManager::setCurrentNativeDevice(deviceId); + // don't forget to restore device back again + AffinityManager::setCurrentNativeDevice(deviceId); } - _mutex.unlock(); + } - // return context for current device - return LaunchContext::_contexts[deviceId].get(); + // return context for current device + return LaunchContext::_contexts[deviceId].get(); } @@ -121,11 +123,11 @@ LaunchContext::LaunchContext() { }; void* LaunchContext::getCublasHandle() const { - return CublasHelper::getInstance()->handle(); + return CublasHelper::getInstance().handle(); }; void* LaunchContext::getCusolverHandle() const { - return CublasHelper::getInstance()->solver(); + return CublasHelper::getInstance().solver(); }; cudaStream_t* LaunchContext::getCudaStream() const { @@ -175,7 +177,7 @@ LaunchContext::LaunchContext() { } void* LaunchContext::getCuDnnHandle() const { - return CublasHelper::getInstance()->cudnn(); + return CublasHelper::getInstance().cudnn(); } sd::ErrorReference* LaunchContext::errorReference() { diff --git a/libnd4j/include/execution/impl/ThreadPool.cpp b/libnd4j/include/execution/impl/ThreadPool.cpp index b02c4c4d5..f6c3fdaca 100644 --- a/libnd4j/include/execution/impl/ThreadPool.cpp +++ b/libnd4j/include/execution/impl/ThreadPool.cpp @@ -78,7 +78,7 @@ namespace samediff { ThreadPool::ThreadPool() { // TODO: number of threads must reflect number of cores for UMA system. In case of NUMA it should be per-device pool // FIXME: on mobile phones this feature must NOT be used - _available = sd::Environment::getInstance()->maxThreads(); + _available = sd::Environment::getInstance().maxThreads(); _queues.resize(_available.load()); _threads.resize(_available.load()); @@ -88,7 +88,7 @@ namespace samediff { for (int e = 0; e < _available.load(); e++) { _queues[e] = new BlockingQueue(2); _interfaces[e] = new CallableInterface(); - _threads[e] = new std::thread(executionLoopWithInterface_, e, _interfaces[e]); + _threads[e] = std::thread(executionLoopWithInterface_, e, _interfaces[e]); _tickets.push(new Ticket()); // _threads[e] = new std::thread(executionLoop_, e, _queues[e]); @@ -125,19 +125,22 @@ namespace samediff { // stop each and every thread // release queue and thread - //delete _queues[e]; - //delete _threads[e]; + delete _queues[e]; + _threads[e].detach(); + //delete _interfaces[e]; } + + while (!_tickets.empty()) { + auto t = _tickets.front(); + _tickets.pop(); + delete t; + } + } - static std::mutex _lmutex; - - ThreadPool* ThreadPool::getInstance() { - std::unique_lock lock(_lmutex); - if (!_INSTANCE) - _INSTANCE = new ThreadPool(); - - return _INSTANCE; + ThreadPool& ThreadPool::getInstance() { + static ThreadPool instance; + return instance; } void ThreadPool::release(int numThreads) { @@ -188,7 +191,4 @@ namespace samediff { std::unique_lock lock(_lock); _tickets.push(ticket); } - - - ThreadPool* ThreadPool::_INSTANCE = 0; } diff --git a/libnd4j/include/execution/impl/Threads.cpp b/libnd4j/include/execution/impl/Threads.cpp index 51339abf1..90dd519b1 100644 --- a/libnd4j/include/execution/impl/Threads.cpp +++ b/libnd4j/include/execution/impl/Threads.cpp @@ -357,7 +357,7 @@ namespace samediff { return 1; } - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads); if (ticket != nullptr) { // if we got our threads - we'll run our jobs here auto span = delta / numThreads; @@ -449,7 +449,7 @@ namespace samediff { // but we still mimic multithreaded execution return numThreads; } else { - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads); if (ticket != nullptr) { for (int e = 0; e < numThreads; e++) { @@ -499,7 +499,7 @@ namespace samediff { return 1; } - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads); if (ticket != nullptr) { auto splitLoop = ThreadsHelper::pickLoop3d(numThreads, itersX, itersY, itersZ); @@ -526,7 +526,7 @@ namespace samediff { } int Threads::parallel_do(FUNC_DO function, uint64_t numThreads) { - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1); if (ticket != nullptr) { // submit tasks one by one @@ -565,7 +565,7 @@ namespace samediff { if (numThreads == 1) return function(0, start, stop, increment); - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1); if (ticket == nullptr) return function(0, start, stop, increment); @@ -609,7 +609,7 @@ namespace samediff { if (numThreads == 1) return function(0, start, stop, increment); - auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1); + auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1); if (ticket == nullptr) return function(0, start, stop, increment); @@ -668,7 +668,7 @@ namespace samediff { numThreads = static_cast(std::ceil((double)delta / spand)); auto span = static_cast(spand); - auto ticket = samediff::ThreadPool::getInstance()->tryAcquire(numThreads); + auto ticket = samediff::ThreadPool::getInstance().tryAcquire(numThreads); if (ticket != nullptr) { //tail_add is additional value of the last part //it could be negative or positive diff --git a/libnd4j/include/execution/impl/Ticket.cpp b/libnd4j/include/execution/impl/Ticket.cpp index 98cb05376..b50b8f771 100644 --- a/libnd4j/include/execution/impl/Ticket.cpp +++ b/libnd4j/include/execution/impl/Ticket.cpp @@ -31,7 +31,7 @@ namespace samediff { Ticket::Ticket() { _acquired = true; - _interfaces.resize(sd::Environment::getInstance()->maxThreads()); + _interfaces.resize(sd::Environment::getInstance().maxThreads()); } bool Ticket::acquired() { @@ -80,11 +80,11 @@ namespace samediff { _interfaces[e]->markAvailable(); // increment availability counter - ThreadPool::getInstance()->release(); + ThreadPool::getInstance().release(); } // return this ticket back to the pool - ThreadPool::getInstance()->release(this); + ThreadPool::getInstance().release(this); } diff --git a/libnd4j/include/graph/ContextPrototype.h b/libnd4j/include/graph/ContextPrototype.h index 57d773dbb..e61831fa7 100644 --- a/libnd4j/include/graph/ContextPrototype.h +++ b/libnd4j/include/graph/ContextPrototype.h @@ -61,7 +61,7 @@ namespace sd { std::vector _dataTypes; sd::ops::OpDescriptor* _opDescriptor; - bool _useMKLDNN = sd::Environment::getInstance()->isUseMKLDNN(); + bool _useMKLDNN = sd::Environment::getInstance().isUseMKLDNN(); // target engine for execution samediff::Engine _engine = DEFAULT_ENGINE; diff --git a/libnd4j/include/graph/GraphHolder.h b/libnd4j/include/graph/GraphHolder.h index 07e091f42..84aebd694 100644 --- a/libnd4j/include/graph/GraphHolder.h +++ b/libnd4j/include/graph/GraphHolder.h @@ -30,7 +30,6 @@ namespace sd { namespace graph { class ND4J_EXPORT GraphHolder { private: - static GraphHolder *_INSTANCE; MAP_IMPL _graphF; MAP_IMPL _locks; @@ -38,7 +37,7 @@ namespace sd { GraphHolder() = default; ~GraphHolder() = default; public: - static GraphHolder* getInstance(); + static GraphHolder& getInstance(); void registerGraph(Nd4jLong graphId, Graph *graph); diff --git a/libnd4j/include/graph/execution/impl/LogicReturn.cpp b/libnd4j/include/graph/execution/impl/LogicReturn.cpp index c9dbafd6d..0ee62e945 100644 --- a/libnd4j/include/graph/execution/impl/LogicReturn.cpp +++ b/libnd4j/include/graph/execution/impl/LogicReturn.cpp @@ -34,7 +34,7 @@ namespace sd { // FIXME!! outputAddr.second = e; - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) nd4j_debug("Return input: <%i, %i>; Return output: <%i, %i>\n", inputAddr.first, inputAddr.second, outputAddr.first, outputAddr.second); auto varIn = __variableSpace->getVariable(inputAddr); @@ -45,7 +45,7 @@ namespace sd { // FIXME: this is obviously wrong, we should keep depth track for backprop here varOut->getNDArray()->assign(varIn->getNDArray()); - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) nd4j_debug("In after: [%f]; Out after: [%f]\n", varIn->getNDArray()->meanNumber().e(0), varOut->getNDArray()->meanNumber().e(0)); } diff --git a/libnd4j/include/graph/execution/impl/LogicWhile.cpp b/libnd4j/include/graph/execution/impl/LogicWhile.cpp index 1dfd3aaf2..fec9a0d30 100644 --- a/libnd4j/include/graph/execution/impl/LogicWhile.cpp +++ b/libnd4j/include/graph/execution/impl/LogicWhile.cpp @@ -96,7 +96,7 @@ namespace sd { // now we should take result of the Scope run, and evaluate it auto result = __variableSpace->getVariable(lastNode)->getNDArray(); - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) result->printBuffer("Result of the last node:"); // if result evaluates to 0.0 - condition returned FALSE diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp index ae5bc59a0..f76f66bbe 100644 --- a/libnd4j/include/graph/impl/Context.cpp +++ b/libnd4j/include/graph/impl/Context.cpp @@ -236,7 +236,7 @@ namespace sd { auto v = variable(p); - if (Environment::getInstance()->isDebugAndVerbose() && v != nullptr && v->getNDArray() != nullptr) { + if (Environment::getInstance().isDebugAndVerbose() && v != nullptr && v->getNDArray() != nullptr) { auto array = v->getNDArray(); std::string shape_ = ShapeUtils::shapeAsString(array); auto type = DataTypeUtils::asString(array->dataType()); diff --git a/libnd4j/include/graph/impl/Graph.cpp b/libnd4j/include/graph/impl/Graph.cpp index 177adbe07..a50d1f4b6 100644 --- a/libnd4j/include/graph/impl/Graph.cpp +++ b/libnd4j/include/graph/impl/Graph.cpp @@ -166,7 +166,7 @@ namespace sd { // aNewShape[5] = 8192; // set type as FLOAT32 by default // aNewShape[6] = 1; // aNewShape[7] = 99; - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, 'c', {1,1}); + newShape = ConstantShapeHelper::getInstance().createShapeInfo(DataType::FLOAT32, 'c', {1,1}); } else { auto in = node->input()->at(0); @@ -184,7 +184,7 @@ namespace sd { //shape::TAD tad(oldShape, node->getDimensions()->data(), node->getDimensions()->size()); auto numTads = shape::tadLength(oldShape, node->getDimensions()->data(), node->getDimensions()->size()); Nd4jLong shape[2] = {1, (int) numTads}; - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(oldShape), 'c', 2, shape); + newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(oldShape), 'c', 2, shape); } std::pair pairAddr(node->id(), 0); @@ -805,7 +805,7 @@ namespace sd { // we're adding final nodes of the graph. those, not used as input anywhere nd4j_debug("Paring nodes... \n", ""); - if (Environment::getInstance()->isDebugAndVerbose()) { + if (Environment::getInstance().isDebugAndVerbose()) { // nd4j_printv("current _output", _output); } //_output.clear(); @@ -852,7 +852,7 @@ namespace sd { if (std::find(_output.begin(), _output.end(), node->id()) == _output.end()) _output.emplace_back(node->id()); - } else if (Environment::getInstance()->isDebugAndVerbose()) { + } else if (Environment::getInstance().isDebugAndVerbose()) { nd4j_debug("Node [%i:<%s>] has %i outputs announced:\n", v, node->name()->c_str(), node->output()->size()); printf("{"); for (auto s : *node->output()) { @@ -1202,7 +1202,7 @@ namespace sd { } break; default: { - opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance()->local_to_string((int) node->opNum()) + "}"; + opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance().local_to_string((int) node->opNum()) + "}"; } } @@ -1250,7 +1250,7 @@ namespace sd { } break; default: { - opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance()->local_to_string((int) node->opNum()) + "}"; + opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance().local_to_string((int) node->opNum()) + "}"; } } @@ -1447,7 +1447,7 @@ namespace sd { } - hash = ops::HashHelper::getInstance()->getLongHash(localStamp); + hash = ops::HashHelper::getInstance().getLongHash(localStamp); nd4j_debug("Graph hash: %lld\n", hash); diff --git a/libnd4j/include/graph/impl/GraphExecutioner.cpp b/libnd4j/include/graph/impl/GraphExecutioner.cpp index c673d2b31..abc3b2e0c 100644 --- a/libnd4j/include/graph/impl/GraphExecutioner.cpp +++ b/libnd4j/include/graph/impl/GraphExecutioner.cpp @@ -88,7 +88,7 @@ namespace graph { Context context(node->getContextPrototype(), variableSpace); - if (sd::Environment::getInstance()->isDebugAndVerbose()) { + if (sd::Environment::getInstance().isDebugAndVerbose()) { //nd4j_debug("Input variables: %i\n", node->input()->size()); printf(" Inputs: {"); for (int e = 0; e < node->input()->size(); e++) { @@ -215,10 +215,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) } auto flowPath = __variableSpace->flowPath(); - Nd4jLong tb0 = Environment::getInstance()->isProfiling() ? GraphProfile::currentTime() : 0L; + Nd4jLong tb0 = Environment::getInstance().isProfiling() ? GraphProfile::currentTime() : 0L; graph->buildGraph(); - auto footprintForward = sd::memory::MemoryRegistrator::getInstance()->getGraphMemoryFootprint(graph->hashCode()); + auto footprintForward = sd::memory::MemoryRegistrator::getInstance().getGraphMemoryFootprint(graph->hashCode()); if (footprintForward > 0) { if (__variableSpace->launchContext()->getWorkspace() != nullptr) { // this method will work only if current workspace size is smaller then proposed value @@ -228,10 +228,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) } // optionally saving graph build time - if (Environment::getInstance()->isProfiling()) + if (Environment::getInstance().isProfiling()) flowPath->profile()->setBuildTime(GraphProfile::relativeTime(tb0)); - Nd4jLong timeStart = Environment::getInstance()->isProfiling() ? GraphProfile::currentTime() : 0L; + Nd4jLong timeStart = Environment::getInstance().isProfiling() ? GraphProfile::currentTime() : 0L; bool pe = graph->getExecutorConfiguration()->_executionMode == ExecutionMode_AUTO; @@ -259,10 +259,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) Node* node = graph->getOnion()->at(l)->at(n); - if (Environment::getInstance()->isProfiling()) + if (Environment::getInstance().isProfiling()) flowPath->profile()->nodeById(node->id(), node->name()->c_str()); - if (lastId != node->id() && Environment::getInstance()->isProfiling()) { + if (lastId != node->id() && Environment::getInstance().isProfiling()) { if (lastId != -10000000) flowPath->profile()->nodeById(lastId)->setTotalTime(GraphProfile::relativeTime(nodeTime)); @@ -458,7 +458,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) // now we skip all branches except of this active one } - if (sd::Environment::getInstance()->isDebugAndVerbose()) { + if (sd::Environment::getInstance().isDebugAndVerbose()) { if (__variableSpace->getVariable(node->id())->hasNDArray()) { auto array = __variableSpace->getVariable(node->id())->getNDArray(); @@ -481,7 +481,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) } // optionally saving execution time - if (Environment::getInstance()->isProfiling()) { + if (Environment::getInstance().isProfiling()) { flowPath->profile()->nodeById(lastId)->setTotalTime(GraphProfile::relativeTime(nodeTime)); flowPath->profile()->setExecutionTime(GraphProfile::relativeTime(timeStart)); //flowPath->profile().printOut(); @@ -491,7 +491,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) if (__variableSpace->launchContext()->getWorkspace() != nullptr) { auto m = __variableSpace->launchContext()->getWorkspace()->getAllocatedSize(); auto h = graph->hashCode(); - sd::memory::MemoryRegistrator::getInstance()->setGraphMemoryFootprintIfGreater(h, m); + sd::memory::MemoryRegistrator::getInstance().setGraphMemoryFootprintIfGreater(h, m); } if (tempFlow) { @@ -523,7 +523,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) // converting FlatGraph to internal representation auto nativeGraph = new Graph(restoredGraph); - if (Environment::getInstance()->isDebugAndVerbose()) { + if (Environment::getInstance().isDebugAndVerbose()) { nativeGraph->printOut(); } @@ -742,7 +742,7 @@ Graph* GraphExecutioner::importFromTensorFlow(const char *fileName) { nd4j_verbose("Node id: [%i]; name: [%s]; opName: [%s]\n", n + 1, node.name().c_str(), node.op().c_str()); - sd::ops::DeclarableOp *op = sd::ops::OpRegistrator::getInstance()->getOperationFloat(node.op().c_str()); + sd::ops::DeclarableOp *op = sd::ops::OpRegistrator::getInstance().getOperationFloat(node.op().c_str()); if (op == nullptr) { nd4j_verbose("Op wasn't found: %s\n", node.op().c_str()); @@ -859,7 +859,7 @@ flatbuffers::Offset GraphExecutioner::execute(Graph *graph, flatbuff } } - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) graph->printOut(); auto status = GraphExecutioner::execute(graph); diff --git a/libnd4j/include/graph/impl/GraphHolder.cpp b/libnd4j/include/graph/impl/GraphHolder.cpp index c480508f5..13c4e3896 100644 --- a/libnd4j/include/graph/impl/GraphHolder.cpp +++ b/libnd4j/include/graph/impl/GraphHolder.cpp @@ -25,11 +25,9 @@ namespace sd { namespace graph { - GraphHolder* GraphHolder::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new GraphHolder(); - - return _INSTANCE; + GraphHolder& GraphHolder::getInstance() { + static GraphHolder instance; + return instance; }; void GraphHolder::registerGraph(Nd4jLong graphId, Graph* graph) { @@ -126,7 +124,5 @@ namespace sd { return res; } - - GraphHolder* GraphHolder::_INSTANCE = 0; } } diff --git a/libnd4j/include/graph/impl/Node.cpp b/libnd4j/include/graph/impl/Node.cpp index e3ea75ef9..a3baf1a9b 100644 --- a/libnd4j/include/graph/impl/Node.cpp +++ b/libnd4j/include/graph/impl/Node.cpp @@ -636,7 +636,7 @@ namespace sd { block->setOpDescriptor(this->getCustomOp()->getOpDescriptor()); } } else if (this->_opType == OpType_CUSTOM) { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(this->opNum()); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(this->opNum()); if (op == nullptr) { nd4j_verbose("Can't find operation: %lld\n", this->opNum()); throw std::runtime_error("Can't find requested operation"); diff --git a/libnd4j/include/helpers/BlasHelper.h b/libnd4j/include/helpers/BlasHelper.h index b2fe7b60c..038df67b5 100644 --- a/libnd4j/include/helpers/BlasHelper.h +++ b/libnd4j/include/helpers/BlasHelper.h @@ -364,8 +364,6 @@ namespace sd { class BlasHelper { private: - static BlasHelper* _instance; - bool _hasHgemv = false; bool _hasHgemm = false; bool _hasHgemmBatch = false; @@ -404,7 +402,7 @@ namespace sd { CusolverDnDgesvd cusolverDnDgesvd; public: - static BlasHelper* getInstance(); + static BlasHelper& getInstance(); void initializeFunctions(Nd4jPointer *functions); void initializeDeviceFunctions(Nd4jPointer *functions); diff --git a/libnd4j/include/helpers/ConstantHelper.h b/libnd4j/include/helpers/ConstantHelper.h index 3e5681fb6..7d4446d34 100644 --- a/libnd4j/include/helpers/ConstantHelper.h +++ b/libnd4j/include/helpers/ConstantHelper.h @@ -35,7 +35,6 @@ namespace sd { class ND4J_EXPORT ConstantHelper { private: - static ConstantHelper* _INSTANCE; ConstantHelper(); std::vector> _cache; @@ -48,9 +47,9 @@ namespace sd { std::vector _counters; public: - ~ConstantHelper() = default; + ~ConstantHelper(); - static ConstantHelper* getInstance(); + static ConstantHelper& getInstance(); static int getCurrentDevice(); static int getNumberOfDevices(); void* replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace = nullptr); diff --git a/libnd4j/include/helpers/ConstantShapeHelper.h b/libnd4j/include/helpers/ConstantShapeHelper.h index 73281c507..25440e05c 100644 --- a/libnd4j/include/helpers/ConstantShapeHelper.h +++ b/libnd4j/include/helpers/ConstantShapeHelper.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include @@ -35,24 +35,22 @@ namespace sd { class ND4J_EXPORT ConstantShapeHelper { private: - static ConstantShapeHelper *_INSTANCE; - std::mutex _mutex; - std::vector> _cache; + std::vector> _cache; ConstantShapeHelper(); public: ~ConstantShapeHelper() = default; - static ConstantShapeHelper* getInstance(); + static ConstantShapeHelper & getInstance(); - ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape); - ConstantDataBuffer bufferForShapeInfo(const ShapeDescriptor &descriptor); - ConstantDataBuffer bufferForShapeInfo(const Nd4jLong *shapeInfo); - ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); - ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector &dimensions = {}); + ConstantShapeBuffer& bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape); + ConstantShapeBuffer& bufferForShapeInfo(const ShapeDescriptor &descriptor); + ConstantShapeBuffer& bufferForShapeInfo(const Nd4jLong *shapeInfo); + ConstantShapeBuffer& bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); + ConstantShapeBuffer& createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector &dimensions = {}); const Nd4jLong* emptyShapeInfo(sd::DataType dataType); diff --git a/libnd4j/include/helpers/ConstantTadHelper.h b/libnd4j/include/helpers/ConstantTadHelper.h index 80efaa86f..10bdd108d 100644 --- a/libnd4j/include/helpers/ConstantTadHelper.h +++ b/libnd4j/include/helpers/ConstantTadHelper.h @@ -35,8 +35,6 @@ namespace sd { class ND4J_EXPORT ConstantTadHelper { private: - static ConstantTadHelper *_INSTANCE; - std::mutex _mutex; std::vector> _cache; @@ -44,7 +42,7 @@ namespace sd { public: ~ConstantTadHelper() = default; - static ConstantTadHelper* getInstance(); + static ConstantTadHelper & getInstance(); /** * These methods calculate Tensor-Along-Dimension(s) shape and offsets diff --git a/libnd4j/include/helpers/DebugHelper.h b/libnd4j/include/helpers/DebugHelper.h index b0387dd8c..10bb1dc90 100644 --- a/libnd4j/include/helpers/DebugHelper.h +++ b/libnd4j/include/helpers/DebugHelper.h @@ -44,7 +44,7 @@ namespace sd { // cuda-specific debug functions #ifdef __CUDACC__ static FORCEINLINE void checkErrorCode(cudaStream_t *stream, int opNum = 0) { - if (Environment::getInstance()->isDebug()) { + if (Environment::getInstance().isDebug()) { cudaError_t res = cudaStreamSynchronize(*stream); if (res != 0) { diff --git a/libnd4j/include/helpers/LoopKind.h b/libnd4j/include/helpers/LoopKind.h index e3ca932b3..4efbea43a 100644 --- a/libnd4j/include/helpers/LoopKind.h +++ b/libnd4j/include/helpers/LoopKind.h @@ -206,7 +206,7 @@ LoopKind::Kind LoopKind::deduceKindOfLoopTadXZ(const Nd4jLong* xShapeInfo, const const bool tVectorOrC = shape::isCommonVector(tadShapeInfo, temp) || tOrder == 'c'; const bool zVectorOrC = shape::isCommonVector(zShapeInfo, temp) || zOrder == 'c';; - if(shape::length(tadShapeInfo) * shape::length(zShapeInfo) <= Environment::getInstance()->elementwiseThreshold() && xEws == 1 && xOrder == 'c' && xRank == 2 && + if(shape::length(tadShapeInfo) * shape::length(zShapeInfo) <= Environment::getInstance().elementwiseThreshold() && xEws == 1 && xOrder == 'c' && xRank == 2 && tEws > 1 && zEws == 1 && (allC || (tVectorOrC && zVectorOrC))) return SMALLARR2DX; if(tEws == 1 && zEws == 1 && (allC || (tVectorOrC && zVectorOrC))) diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h index f18bcc63d..9bf3daede 100644 --- a/libnd4j/include/helpers/Loops.h +++ b/libnd4j/include/helpers/Loops.h @@ -702,21 +702,21 @@ namespace sd { std::vector zeroOffsets; if (xLen == yLen) { - tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dims, dimsLen); - tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen); + tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dims, dimsLen); + tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dims, dimsLen); xTadShapeInfo = tadPackX.primaryShapeInfo(); yTadShapeInfo = tadPackY.primaryShapeInfo(); xTadOffsets = tadPackX.primaryOffsets(); yTadOffsets = tadPackY.primaryOffsets(); } else if (yLen > xLen) { - tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen); + tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dims, dimsLen); xTadShapeInfo = xShapeInfo; yTadShapeInfo = tadPackY.primaryShapeInfo(); yTadOffsets = tadPackY.primaryOffsets(); } else { - tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dims, dimsLen); + tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dims, dimsLen); yTadShapeInfo = yShapeInfo; xTadShapeInfo = tadPackX.primaryShapeInfo(); xTadOffsets = tadPackX.primaryOffsets(); diff --git a/libnd4j/include/helpers/OpTracker.h b/libnd4j/include/helpers/OpTracker.h index 122f4f32b..dfccf5e5d 100644 --- a/libnd4j/include/helpers/OpTracker.h +++ b/libnd4j/include/helpers/OpTracker.h @@ -32,8 +32,6 @@ namespace sd { class ND4J_EXPORT OpTracker { private: - static OpTracker* _INSTANCE; - std::string _export; int _operations = 0; @@ -45,7 +43,7 @@ namespace sd { template std::string local_to_string(T value); public: - static OpTracker* getInstance(); + static OpTracker& getInstance(); int totalGroups(); int totalOperations(); diff --git a/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h b/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h index 3a043be59..8c61bda23 100644 --- a/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h +++ b/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h @@ -69,14 +69,14 @@ namespace sd { void executeOnce() override { PointersManager manager(LaunchContext::defaultContext(), "BroadcastBM"); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(_x->shapeInfo(), _axis); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(_z->shapeInfo(), _axis); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(_x->shapeInfo(), _axis); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(_z->shapeInfo(), _axis); - auto tadOnlyShapeInfo = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); - auto tadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); + auto tadOnlyShapeInfo = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); + auto tadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); - auto tadOnlyShapeInfoZ = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); - auto tadOffsetsZ = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); + auto tadOnlyShapeInfoZ = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); + auto tadOffsetsZ = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); NativeOpExecutioner::execBroadcast(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), _y->buffer(), _y->shapeInfo(), _y->specialBuffer(), _y->specialShapeInfo(), _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo(), nullptr, _axis.size(), /*Nd4jLong **/ tadOnlyShapeInfo, /*Nd4jLong */ tadOffsets, /*Nd4jLong */ tadOnlyShapeInfoZ, /*Nd4jLong */ tadOffsetsZ); diff --git a/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h b/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h index f9347eb05..58c018a5b 100644 --- a/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h +++ b/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h @@ -36,7 +36,7 @@ namespace sd { sd::graph::Context *_context = nullptr; public: DeclarableBenchmark(sd::ops::DeclarableOp &op, std::string name = 0) : OpBenchmark() { - _op = &op; //ops::OpRegistrator::getInstance()->getOperation(op.getOpHash()); + _op = &op; //ops::OpRegistrator::getInstance().getOperation(op.getOpHash()); _testName = name; } diff --git a/libnd4j/include/helpers/benchmark/ReductionBenchmark.h b/libnd4j/include/helpers/benchmark/ReductionBenchmark.h index a1dc0126f..d87c20d3c 100644 --- a/libnd4j/include/helpers/benchmark/ReductionBenchmark.h +++ b/libnd4j/include/helpers/benchmark/ReductionBenchmark.h @@ -88,10 +88,10 @@ namespace sd { else NativeOpExecutioner::execReduceSameScalar(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), nullptr, _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo()); else { - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_x->shapeInfo(), _axis); + auto pack = ConstantTadHelper::getInstance().tadForDimensions(_x->shapeInfo(), _axis); - auto tadOnlyShapeInfo = Environment::getInstance()->isCPU() ? pack.primaryShapeInfo() : pack.specialShapeInfo(); - auto tadOffsets = Environment::getInstance()->isCPU() ? pack.primaryOffsets() : pack.specialOffsets(); + auto tadOnlyShapeInfo = Environment::getInstance().isCPU() ? pack.primaryShapeInfo() : pack.specialShapeInfo(); + auto tadOffsets = Environment::getInstance().isCPU() ? pack.primaryOffsets() : pack.specialOffsets(); if (_opType == 0) NativeOpExecutioner::execReduceFloat(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), nullptr, _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo(), nullptr, _axis.size(), tadOnlyShapeInfo, tadOffsets); diff --git a/libnd4j/include/helpers/cpu/ConstantHelper.cpp b/libnd4j/include/helpers/cpu/ConstantHelper.cpp index 10b8a52c3..be6eff65c 100644 --- a/libnd4j/include/helpers/cpu/ConstantHelper.cpp +++ b/libnd4j/include/helpers/cpu/ConstantHelper.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace sd { @@ -42,11 +43,17 @@ namespace sd { } } - ConstantHelper* ConstantHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new sd::ConstantHelper(); +ConstantHelper::~ConstantHelper() { + for (const auto &v:_cache) { + for (const auto &c:v) { + delete c.second; + } + } +} - return _INSTANCE; +ConstantHelper& ConstantHelper::getInstance() { + static ConstantHelper instance; + return instance; } void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) { @@ -95,17 +102,17 @@ namespace sd { result = holder->getConstantDataBuffer(dataType); else { auto size = descriptor.length() * DataTypeUtils::sizeOf(dataType); - auto cbuff = new int8_t[size]; + auto cbuff = std::make_shared(new int8_t[size], std::make_shared()); _counters[deviceId] += size; // create buffer with this dtype if (descriptor.isFloat()) { - BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast(descriptor.floatValues().data()), descriptor.length(), cbuff), (sd::DataType::DOUBLE, double), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast(descriptor.floatValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::DOUBLE, double), LIBND4J_TYPES); } else if (descriptor.isInteger()) { - BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast(descriptor.integerValues().data()), descriptor.length(), cbuff), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast(descriptor.integerValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES); } - ConstantDataBuffer dataBuffer(cbuff, nullptr, descriptor.length(), DataTypeUtils::sizeOf(dataType)); + ConstantDataBuffer dataBuffer(cbuff, descriptor.length(), dataType); holder->addBuffer(dataBuffer, dataType); result = holder->getConstantDataBuffer(dataType); @@ -122,8 +129,6 @@ namespace sd { else return _counters[deviceId]; } - - sd::ConstantHelper* sd::ConstantHelper::_INSTANCE = 0; } #endif \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp index fc8abe8aa..528527f36 100644 --- a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp +++ b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp @@ -24,51 +24,50 @@ #include #include #include +#include namespace sd { ConstantShapeHelper::ConstantShapeHelper() { _cache.resize(32); for (int e = 0; e < 32; e++) { - MAP_IMPL cache; + MAP_IMPL cache; _cache[e] = cache; } } - ConstantShapeHelper* ConstantShapeHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new ConstantShapeHelper(); - - return _INSTANCE; + ConstantShapeHelper& ConstantShapeHelper::getInstance() { + static ConstantShapeHelper instance; + return instance; } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor); } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor); } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { - int deviceId = 0; +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { + int deviceId = 0; - std::lock_guard lock(_mutex); + std::lock_guard lock(_mutex); - if (_cache[deviceId].count(descriptor) == 0) { - auto hPtr = descriptor.toShapeInfo(); - ConstantDataBuffer buffer(hPtr, nullptr, shape::shapeInfoLength(hPtr)*sizeof(Nd4jLong), DataType::INT64); - ShapeDescriptor descriptor1(descriptor); - _cache[deviceId][descriptor1] = buffer; - return _cache[deviceId][descriptor1]; - } else { - return _cache[deviceId].at(descriptor); - } - } + if (_cache[deviceId].count(descriptor) == 0) { + auto hPtr = std::make_shared(descriptor.toShapeInfo(), std::make_shared()); + ConstantShapeBuffer buffer(hPtr); + ShapeDescriptor descriptor1(descriptor); + _cache[deviceId][descriptor1] = buffer; + return _cache[deviceId][descriptor1]; + } else { + return _cache[deviceId].at(descriptor); + } +} - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) { ShapeDescriptor descriptor(shapeInfo); return bufferForShapeInfo(descriptor); } @@ -83,7 +82,7 @@ namespace sd { const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { @@ -92,26 +91,26 @@ namespace sd { const Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { @@ -135,7 +134,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector &dimensions) { +ConstantShapeBuffer& ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector &dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); @@ -185,10 +184,6 @@ ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(c return bufferForShapeInfo(descriptor); } - - -sd::ConstantShapeHelper* sd::ConstantShapeHelper::_INSTANCE = 0; - -} +} // namespace sd #endif \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp b/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp index ea32db7e6..9f859ee3e 100644 --- a/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp +++ b/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp @@ -21,6 +21,8 @@ #include "../ConstantTadHelper.h" #include #include +#include +#include #ifndef __CUDABLAS__ @@ -32,11 +34,9 @@ namespace sd { _cache.emplace_back(pack); } - ConstantTadHelper* ConstantTadHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new ConstantTadHelper(); - - return _INSTANCE; + ConstantTadHelper& ConstantTadHelper::getInstance() { + static ConstantTadHelper instance; + return instance; } TadPack ConstantTadHelper::tadForDimensions(const Nd4jLong *originalShape, int dimension, const bool keepUnitiesInShape) { @@ -60,60 +60,31 @@ namespace sd { TadPack ConstantTadHelper::tadForDimensions(TadDescriptor &descriptor) { const int deviceId = 0; - _mutex.lock(); + std::lock_guard lock(_mutex); if (_cache[deviceId].count(descriptor) == 0) { - + // if there's no TadPack matching this descriptor - create one const auto shapeInfo = descriptor.originalShape().toShapeInfo(); const int rank = shape::rank(shapeInfo); const std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(rank, descriptor.axis()); const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(shapeInfo, dimsToExclude); const int subArrRank = (rank == dimsToExclude.size() || descriptor.areUnitiesinShape()) ? rank : rank - dimsToExclude.size(); - auto sPtr = new Nd4jLong[shape::shapeInfoLength(subArrRank)]; // shape of sub-arrays (same for all for them) - auto oPtr = new Nd4jLong[numOfSubArrs]; + auto sPtr = std::make_shared(new Nd4jLong[shape::shapeInfoLength(subArrRank)], std::make_shared()); // shape of sub-arrays (same for all for them) + auto oPtr = std::make_shared(new Nd4jLong[numOfSubArrs], std::make_shared()); if (numOfSubArrs > 0) - shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr, oPtr, descriptor.areUnitiesinShape()); - - - ConstantDataBuffer shapesBuffer(sPtr, nullptr, shape::shapeInfoLength(subArrRank)*sizeof(Nd4jLong), DataType::INT64); - ConstantDataBuffer offsetsBuffer(oPtr, nullptr, numOfSubArrs*sizeof(Nd4jLong), DataType::INT64); - TadPack t(shapesBuffer, offsetsBuffer, numOfSubArrs); - - - - // auto shapeInfo = descriptor.originalShape().toShapeInfo(); - // shape::TAD tad; - // tad.init(shapeInfo, descriptor.axis().data(), descriptor.axis().size()); - // tad.createTadOnlyShapeInfo(); - // tad.createOffsets(); - - // auto sPtr = new Nd4jLong[shape::shapeInfoLength(tad.tadOnlyShapeInfo)]; - // auto oPtr = new Nd4jLong[tad.numTads]; - - // memcpy(sPtr, tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - // memcpy(oPtr, tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - - // TadPack t(shapesBuffer, offsetsBuffer, tad.numTads); - + shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr->pointerAsT(), oPtr->pointerAsT(), descriptor.areUnitiesinShape()); + ConstantShapeBuffer shapeBuffer(sPtr); + ConstantOffsetsBuffer offsetsBuffer(oPtr); + TadPack t(shapeBuffer, offsetsBuffer, numOfSubArrs); _cache[deviceId][descriptor] = t; - TadPack &r = _cache[deviceId][descriptor]; - _mutex.unlock(); - delete[] shapeInfo; - - return r; - } else { - TadPack r = _cache[deviceId][descriptor]; - _mutex.unlock(); - - return r; } - } - sd::ConstantTadHelper* sd::ConstantTadHelper::_INSTANCE = 0; + return _cache[deviceId][descriptor]; + } } #endif \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/MmulHelper.cpp b/libnd4j/include/helpers/cpu/MmulHelper.cpp index 26a6643c3..437eebe1d 100644 --- a/libnd4j/include/helpers/cpu/MmulHelper.cpp +++ b/libnd4j/include/helpers/cpu/MmulHelper.cpp @@ -162,7 +162,7 @@ static void usualDot(const Nd4jLong length, const double alpha, const void* vX, const bool betaPersent = beta; T3 sum = 0; - PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance()->elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum)) + PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance().elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum)) for(Nd4jLong i = 0; i < length; ++i) sum += X[i * incx] * Y[i * incy]; @@ -210,7 +210,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con const auto cType = C->dataType(); const bool AB(aType == bType), AC(aType == cType), ABC(AB && AC); - const bool hasGemm = BlasHelper::getInstance()->hasGEMM(aType); + const bool hasGemm = BlasHelper::getInstance().hasGEMM(aType); const bool typeDouble = hasGemm && ABC && aType == DataType::DOUBLE; const bool typeFloat = hasGemm && ABC && aType == DataType::FLOAT32; @@ -261,10 +261,10 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con const int ldc = (cMcont && cNcont) ? M : !cMcont ? pC->strideAt(0) : pC->strideAt(1); if(typeFloat) { - BlasHelper::getInstance()->sgemm()(blasOrder, transAblas, transBblas, M, N, K, (float) alpha, pA->bufferAsT(), lda, pB->bufferAsT(), ldb, (float) beta, pC->bufferAsT(), ldc); + BlasHelper::getInstance().sgemm()(blasOrder, transAblas, transBblas, M, N, K, (float) alpha, pA->bufferAsT(), lda, pB->bufferAsT(), ldb, (float) beta, pC->bufferAsT(), ldc); } else if(typeDouble) { - BlasHelper::getInstance()->dgemm()(blasOrder, transAblas, transBblas, M, N, K, (double) alpha, pA->bufferAsT(), lda, pB->bufferAsT(), ldb, (double) beta, pC->bufferAsT(), ldc); + BlasHelper::getInstance().dgemm()(blasOrder, transAblas, transBblas, M, N, K, (double) alpha, pA->bufferAsT(), lda, pB->bufferAsT(), ldb, (double) beta, pC->bufferAsT(), ldc); } if(pC != C) { @@ -321,7 +321,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, const auto yType = Y->dataType(); const bool AX(aType == xType), AY(aType == yType), AXY(AX && AY); - const bool hasGemv = BlasHelper::getInstance()->hasGEMV(aType); + const bool hasGemv = BlasHelper::getInstance().hasGEMV(aType); const bool typeDouble = hasGemv && AXY && aType == DataType::DOUBLE; const bool typeFloat = hasGemv && AXY && aType == DataType::FLOAT32; @@ -347,10 +347,10 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, // choose appropriate cuda gemm api depending on data types if(typeDouble) { - BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy); + BlasHelper::getInstance().dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy); } else if(typeFloat) { - BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy); + BlasHelper::getInstance().sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy); } if(pA != A) @@ -617,7 +617,7 @@ static void usualGemm(const char cOrder, const bool transA, const bool transB, c const bool flagA = (flagC && transA) || (!flagC && !transA); const bool flagB = (flagC && transB) || (!flagC && !transB); - // PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(M*N > Environment::getInstance()->elementwiseThreshold()) schedule(guided)) + // PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(M*N > Environment::getInstance().elementwiseThreshold()) schedule(guided)) // for(uint row = 0; row < M; ++row) { // T3* c = flagC ? (C + row) : (C + row * ldc); diff --git a/libnd4j/include/helpers/cpu/cublasHelper.cpp b/libnd4j/include/helpers/cpu/cublasHelper.cpp index f6f718702..4b17e601d 100644 --- a/libnd4j/include/helpers/cpu/cublasHelper.cpp +++ b/libnd4j/include/helpers/cpu/cublasHelper.cpp @@ -37,11 +37,9 @@ namespace sd { } - CublasHelper* CublasHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new sd::CublasHelper(); - - return _INSTANCE; + CublasHelper& CublasHelper::getInstance() { + static CublasHelper instance; + return instance; } void* CublasHelper::handle() { @@ -55,7 +53,4 @@ namespace sd { void* CublasHelper::handle(int deviceId) { return nullptr; } - - - sd::CublasHelper* sd::CublasHelper::_INSTANCE = 0; } \ No newline at end of file diff --git a/libnd4j/include/helpers/cublasHelper.h b/libnd4j/include/helpers/cublasHelper.h index 0300f3698..8ebdc66a7 100644 --- a/libnd4j/include/helpers/cublasHelper.h +++ b/libnd4j/include/helpers/cublasHelper.h @@ -29,7 +29,6 @@ namespace sd { class ND4J_EXPORT CublasHelper { private: - static CublasHelper *_INSTANCE; static std::mutex _mutex; std::vector _cache; @@ -37,9 +36,9 @@ namespace sd { std::vector _cudnn; CublasHelper(); - ~CublasHelper(); public: - static CublasHelper* getInstance(); + ~CublasHelper(); + static CublasHelper& getInstance(); void* cudnn(); void* solver(); diff --git a/libnd4j/include/helpers/cuda/ConstantHelper.cu b/libnd4j/include/helpers/cuda/ConstantHelper.cu index 62d932489..7eb9273e5 100644 --- a/libnd4j/include/helpers/cuda/ConstantHelper.cu +++ b/libnd4j/include/helpers/cuda/ConstantHelper.cu @@ -29,6 +29,7 @@ #include #include #include +#include #define CONSTANT_LIMIT 49152 @@ -84,11 +85,17 @@ namespace sd { throw cuda_exception::build("Final cudaSetDevice failed", res); } - ConstantHelper* ConstantHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new sd::ConstantHelper(); +ConstantHelper::~ConstantHelper() { + for (const auto &v:_cache) { + for (const auto &c:v) { + delete c.second; + } + } +} - return _INSTANCE; + ConstantHelper& ConstantHelper::getInstance() { + static ConstantHelper instance; + return instance; } void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) { @@ -156,19 +163,21 @@ namespace sd { result = holder->getConstantDataBuffer(dataType); } else { auto numBytes = descriptor.length() * DataTypeUtils::sizeOf(dataType); - auto cbuff = new int8_t[numBytes]; + auto cbuff = std::make_shared(new int8_t[numBytes], std::make_shared()); _counters[deviceId] += numBytes; // create buffer with this dtype if (descriptor.isFloat()) { - BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast(descriptor.floatValues().data()), descriptor.length(), cbuff), (sd::DataType::DOUBLE, double), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast(descriptor.floatValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::DOUBLE, double), LIBND4J_TYPES); } else if (descriptor.isInteger()) { - BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast(descriptor.integerValues().data()), descriptor.length(), cbuff), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast(descriptor.integerValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES); } - auto dbuff = replicatePointer(cbuff, descriptor.length() * DataTypeUtils::sizeOf(dataType)); + // we don't have deallocator here. + // TODO: we probably want to make use deallocator here, if we're not using constant memory + auto dbuff = std::make_shared(replicatePointer(cbuff->pointer(), descriptor.length() * DataTypeUtils::sizeOf(dataType))); - ConstantDataBuffer dataBuffer(cbuff, dbuff, descriptor.length(), DataTypeUtils::sizeOf(dataType)); + ConstantDataBuffer dataBuffer(cbuff, dbuff, descriptor.length(), dataType); holder->addBuffer(dataBuffer, dataType); result = holder->getConstantDataBuffer(dataType); @@ -184,6 +193,4 @@ namespace sd { else return _counters[deviceId]; } - - sd::ConstantHelper* sd::ConstantHelper::_INSTANCE = 0; } \ No newline at end of file diff --git a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu index 2026dbb04..35ba60ca9 100644 --- a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu +++ b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu @@ -24,6 +24,8 @@ #include #include #include +#include +#include namespace sd { @@ -32,46 +34,44 @@ namespace sd { _cache.resize(numDevices); for (int e = 0; e < numDevices; e++) { - MAP_IMPL cache; + MAP_IMPL cache; _cache[e] = cache; } } - ConstantShapeHelper* ConstantShapeHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new ConstantShapeHelper(); - - return _INSTANCE; + ConstantShapeHelper& ConstantShapeHelper::getInstance() { + static ConstantShapeHelper instance; + return instance; } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape) { + ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor); } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor); } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { int deviceId = AffinityManager::currentDeviceId(); std::lock_guard lock(_mutex); if (_cache[deviceId].count(descriptor) == 0) { - auto hPtr = descriptor.toShapeInfo(); - auto dPtr = ConstantHelper::getInstance()->replicatePointer(hPtr, shape::shapeInfoByteLength(hPtr)); - ConstantDataBuffer buffer(hPtr, dPtr, shape::shapeInfoLength(hPtr) * sizeof(Nd4jLong), DataType::INT64); - ShapeDescriptor descriptor1(descriptor); - _cache[deviceId][descriptor1] = buffer; - return _cache[deviceId][descriptor1]; + auto hPtr = std::make_shared(descriptor.toShapeInfo(), std::make_shared()); + auto dPtr = std::make_shared(ConstantHelper::getInstance().replicatePointer(hPtr->pointer(), shape::shapeInfoByteLength(hPtr->pointerAsT())), std::make_shared()); + ConstantShapeBuffer buffer(hPtr, dPtr); + ShapeDescriptor descriptor1(descriptor); + _cache[deviceId][descriptor1] = buffer; + return _cache[deviceId][descriptor1]; } else { - return _cache[deviceId].at(descriptor); + return _cache[deviceId].at(descriptor); } } - ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) { +ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) { ShapeDescriptor descriptor(shapeInfo); return bufferForShapeInfo(descriptor); } @@ -85,7 +85,7 @@ namespace sd { Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { @@ -94,26 +94,26 @@ namespace sd { Nd4jLong const* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { - return bufferForShapeInfo(descriptor).primaryAsT(); + return bufferForShapeInfo(descriptor).primary(); } Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { @@ -136,7 +136,7 @@ namespace sd { } //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector& dimensions) { +ConstantShapeBuffer& ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector& dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); @@ -187,7 +187,4 @@ ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(c return bufferForShapeInfo(descriptor); } - -sd::ConstantShapeHelper* sd::ConstantShapeHelper::_INSTANCE = 0; - } \ No newline at end of file diff --git a/libnd4j/include/helpers/cuda/ConstantTadHelper.cu b/libnd4j/include/helpers/cuda/ConstantTadHelper.cu index 8463bab9c..662c99e7c 100644 --- a/libnd4j/include/helpers/cuda/ConstantTadHelper.cu +++ b/libnd4j/include/helpers/cuda/ConstantTadHelper.cu @@ -25,6 +25,8 @@ #include #include #include +#include +#include namespace sd { ConstantTadHelper::ConstantTadHelper() { @@ -36,11 +38,9 @@ namespace sd { } } - ConstantTadHelper* ConstantTadHelper::getInstance() { - if (!_INSTANCE) - _INSTANCE = new ConstantTadHelper(); - - return _INSTANCE; + ConstantTadHelper& ConstantTadHelper::getInstance() { + static ConstantTadHelper instance; + return instance; } TadPack ConstantTadHelper::tadForDimensions(const Nd4jLong *originalShape, int dimension, const bool keepUnitiesInShape) { @@ -73,25 +73,28 @@ namespace sd { const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(shapeInfo, dimsToExclude); const int subArrRank = (rank == dimsToExclude.size() || descriptor.areUnitiesinShape()) ? rank : rank - dimsToExclude.size(); - auto sPtr = new Nd4jLong[shape::shapeInfoLength(subArrRank)]; - auto oPtr = new Nd4jLong[numOfSubArrs]; + auto sPtr = std::make_shared(new Nd4jLong[shape::shapeInfoLength(subArrRank)], std::make_shared()); + auto oPtr = std::make_shared(new Nd4jLong[numOfSubArrs], std::make_shared()); if (numOfSubArrs > 0) - shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr, oPtr, descriptor.areUnitiesinShape()); + shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr->pointerAsT(), oPtr->pointerAsT(), descriptor.areUnitiesinShape()); Nd4jPointer soPtr; auto res = cudaMalloc(reinterpret_cast(&soPtr), numOfSubArrs * sizeof(Nd4jLong)); if (res != 0) throw cuda_exception::build("Memory allocation for tadOffsets failed", res); - res = cudaMemcpy(soPtr, oPtr, numOfSubArrs * sizeof(Nd4jLong), cudaMemcpyHostToDevice); + res = cudaMemcpy(soPtr, oPtr->pointer(), numOfSubArrs * sizeof(Nd4jLong), cudaMemcpyHostToDevice); if (res != 0) throw cuda_exception::build("tadOffsets copy failed", res); - auto ssPtr = ConstantHelper::getInstance()->replicatePointer(sPtr, shape::shapeInfoByteLength(subArrRank)); + // TODO: add deallocator here? + auto ssPtr = std::make_shared(ConstantHelper::getInstance().replicatePointer(sPtr->pointer(), shape::shapeInfoByteLength(subArrRank))); - ConstantDataBuffer shapesBuffer(sPtr, ssPtr, shape::shapeInfoLength(subArrRank) * sizeof(Nd4jLong), DataType::INT64); - ConstantDataBuffer offsetsBuffer(oPtr, soPtr, numOfSubArrs * sizeof(Nd4jLong), DataType::INT64); + + + ConstantShapeBuffer shapesBuffer(sPtr, ssPtr); + ConstantOffsetsBuffer offsetsBuffer(oPtr, std::make_shared(soPtr, std::make_shared())); TadPack t(shapesBuffer, offsetsBuffer, numOfSubArrs); _cache[deviceId][descriptor] = t; @@ -107,6 +110,4 @@ namespace sd { return r; } } - - sd::ConstantTadHelper* sd::ConstantTadHelper::_INSTANCE = 0; } \ No newline at end of file diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu index 0a3b466bc..d1122d794 100644 --- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu +++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu @@ -238,7 +238,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou if (C->isEmpty()) return C; - const int major = Environment::getInstance()->capabilities()[AffinityManager::currentDeviceId()].first(); + const int major = Environment::getInstance().capabilities()[AffinityManager::currentDeviceId()].first(); const auto aType = A->dataType(); const auto bType = B->dataType(); @@ -268,7 +268,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou const int sharedMem = threadsPerBlock * sizeof(int) * 6 + 128; // 6 = aRank + bRank + cRank NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->special(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); @@ -411,7 +411,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, const int blocksPerGrid = (M + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({Y}, {A, X}); - // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->special(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({Y}, {A, X}); @@ -667,7 +667,7 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con cBatchDims = reinterpret_cast(manager.replicatePointer(ShapeUtils::evalDimsToExclude(cRank, {cMaxis, cNaxis}).data(), (cRank - 2) * sizeof(int))); NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->special(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); diff --git a/libnd4j/include/helpers/cuda_off/cublasHelper.cu b/libnd4j/include/helpers/cuda_off/cublasHelper.cu index 7ab2d7d63..1773937ea 100644 --- a/libnd4j/include/helpers/cuda_off/cublasHelper.cu +++ b/libnd4j/include/helpers/cuda_off/cublasHelper.cu @@ -102,13 +102,9 @@ namespace sd { destroyHandle_(_cache[e]); } - CublasHelper* CublasHelper::getInstance() { - _mutex.lock(); - if (!_INSTANCE) - _INSTANCE = new sd::CublasHelper(); - _mutex.unlock(); - - return _INSTANCE; + CublasHelper& CublasHelper::getInstance() { + static CublasHelper instance; + return instance; } void* CublasHelper::cudnn() { @@ -138,7 +134,4 @@ namespace sd { return _cache[deviceId]; } - - - sd::CublasHelper* sd::CublasHelper::_INSTANCE = 0; } \ No newline at end of file diff --git a/libnd4j/include/helpers/helper_hash.h b/libnd4j/include/helpers/helper_hash.h index 1b032238f..fa44b04b7 100644 --- a/libnd4j/include/helpers/helper_hash.h +++ b/libnd4j/include/helpers/helper_hash.h @@ -31,8 +31,6 @@ namespace sd { namespace ops { class ND4J_EXPORT HashHelper { private: - static HashHelper* _INSTANCE; - Nd4jLong _byteTable[256]; const Nd4jLong HSTART = 0xBB40E64DA205B064L; const Nd4jLong HMULT = 7664345821815920749L; @@ -41,7 +39,7 @@ namespace sd { std::mutex _locker; public: - static HashHelper* getInstance(); + static HashHelper& getInstance(); Nd4jLong getLongHash(std::string& str); }; } diff --git a/libnd4j/include/helpers/impl/BlasHelper.cpp b/libnd4j/include/helpers/impl/BlasHelper.cpp index 378c8a6f1..70839fe2d 100644 --- a/libnd4j/include/helpers/impl/BlasHelper.cpp +++ b/libnd4j/include/helpers/impl/BlasHelper.cpp @@ -20,10 +20,9 @@ #include namespace sd { - BlasHelper* BlasHelper::getInstance() { - if (_instance == 0) - _instance = new BlasHelper(); - return _instance; + BlasHelper& BlasHelper::getInstance() { + static BlasHelper instance; + return instance; } @@ -74,7 +73,7 @@ namespace sd { template <> bool BlasHelper::hasGEMV() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -86,7 +85,7 @@ namespace sd { template <> bool BlasHelper::hasGEMV() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -138,7 +137,7 @@ namespace sd { bool BlasHelper::hasGEMV(const sd::DataType dtype) { if(dtype == DataType::FLOAT32) { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -148,7 +147,7 @@ namespace sd { #endif } if(dtype == DataType::DOUBLE) { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -162,7 +161,7 @@ namespace sd { template <> bool BlasHelper::hasGEMM() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -174,7 +173,7 @@ namespace sd { template <> bool BlasHelper::hasGEMM() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -226,7 +225,7 @@ namespace sd { bool BlasHelper:: hasGEMM(const sd::DataType dtype) { if(dtype == DataType::FLOAT32) { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -236,7 +235,7 @@ namespace sd { #endif } if(dtype == DataType::DOUBLE) { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS) @@ -251,7 +250,7 @@ namespace sd { template <> bool BlasHelper::hasBatchedGEMM() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; return _hasSgemmBatch; @@ -259,7 +258,7 @@ namespace sd { template <> bool BlasHelper::hasBatchedGEMM() { - if (sd::Environment::getInstance()->blasFallback()) + if (sd::Environment::getInstance().blasFallback()) return false; return _hasDgemmBatch; @@ -362,6 +361,4 @@ namespace sd { // destructor BlasHelper::~BlasHelper() noexcept { } - - BlasHelper* BlasHelper::_instance = 0; } diff --git a/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp b/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp index 0e409a952..b0ef97457 100644 --- a/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp +++ b/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp @@ -32,7 +32,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////////////// OmpLaunchHelper::OmpLaunchHelper(const Nd4jLong N, float desiredNumThreads) { - auto maxItersPerThread = Environment::getInstance()->elementwiseThreshold(); + auto maxItersPerThread = Environment::getInstance().elementwiseThreshold(); if(N < maxItersPerThread) _numThreads = 1; @@ -45,7 +45,7 @@ OmpLaunchHelper::OmpLaunchHelper(const Nd4jLong N, float desiredNumThreads) { else desiredNumThreads = sd::math::nd4j_min(omp_get_max_threads(), desiredNumThreads); #else - desiredNumThreads = sd::Environment::getInstance()->maxThreads(); + desiredNumThreads = sd::Environment::getInstance().maxThreads(); #endif _numThreads = sd::math::nd4j_min(N / maxItersPerThread, desiredNumThreads); } @@ -75,12 +75,12 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) { #ifdef _OPENMP return betterThreads(N, omp_get_max_threads()); #else - return betterThreads(N, sd::Environment::getInstance()->maxThreads());; + return betterThreads(N, sd::Environment::getInstance().maxThreads());; #endif } int OmpLaunchHelper::betterThreads(Nd4jLong N, int maxThreads) { - auto t = Environment::getInstance()->elementwiseThreshold(); + auto t = Environment::getInstance().elementwiseThreshold(); if (N < t) return 1; else { @@ -92,7 +92,7 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) { #ifdef _OPENMP auto maxThreads = omp_get_max_threads(); #else - auto maxThreads = sd::Environment::getInstance()->maxThreads(); + auto maxThreads = sd::Environment::getInstance().maxThreads(); #endif // if there's only 1 thread allowed - nothing to do here @@ -102,7 +102,7 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) { auto totalLength = tadLength * numTads; // if array is tiny - no need to spawn any threeds - if (totalLength < Environment::getInstance()->elementwiseThreshold()) + if (totalLength < Environment::getInstance().elementwiseThreshold()) return 1; // by default we're spawning as many threads we can, but not more than number of TADs diff --git a/libnd4j/include/helpers/impl/OpTracker.cpp b/libnd4j/include/helpers/impl/OpTracker.cpp index bb82ab0d1..e36d4ab5a 100644 --- a/libnd4j/include/helpers/impl/OpTracker.cpp +++ b/libnd4j/include/helpers/impl/OpTracker.cpp @@ -29,11 +29,9 @@ using namespace sd::graph; namespace sd { - OpTracker* OpTracker::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new OpTracker(); - - return _INSTANCE; + OpTracker& OpTracker::getInstance() { + static OpTracker instance; + return instance; } void OpTracker::storeOperation(sd::graph::OpType opType, const OpDescriptor& descriptor) { @@ -118,6 +116,4 @@ namespace sd { return _export.c_str(); } - - sd::OpTracker* sd::OpTracker::_INSTANCE = 0; } diff --git a/libnd4j/include/helpers/impl/ShapeUtils.cpp b/libnd4j/include/helpers/impl/ShapeUtils.cpp index c327004bd..2c189cff1 100644 --- a/libnd4j/include/helpers/impl/ShapeUtils.cpp +++ b/libnd4j/include/helpers/impl/ShapeUtils.cpp @@ -130,7 +130,7 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons Nd4jLong* outShapeInfo = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dataType, true, workspace); ShapeDescriptor descriptor(outShapeInfo, dataType); RELEASE(outShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } const int rank = shape::rank(shapeInfo); @@ -168,7 +168,7 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons ShapeDescriptor descriptor(outShapeInfo, dataType); RELEASE(outShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { @@ -207,20 +207,20 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } else if(supportOldShapes) { ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong); shape::shapeOldScalar(dataType, newShapeInfo, 'c'); ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } else { newShapeInfo = ShapeBuilders::createScalarShapeInfo(dataType, workspace); ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } } @@ -241,7 +241,7 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons ShapeUtils::updateStridesAndType(newShapeInfo, shapeInfo, order); ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } int newRank = rank - dimSize; @@ -252,13 +252,13 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons shape::shapeOldScalar(ArrayOptions::dataType(shapeInfo), newShapeInfo, 'c'); ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } else { newShapeInfo = ShapeBuilders::createScalarShapeInfo(ArrayOptions::dataType(shapeInfo), workspace); ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } } @@ -289,7 +289,7 @@ std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, cons ShapeDescriptor descriptor(newShapeInfo, dataType); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } ////////////////////////////////////////////////////////////////////////// @@ -341,7 +341,7 @@ std::vector ShapeUtils::evalRepeatShape(int axis, const std::vectorbufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } ////////////////////////////////////////////////////////////////////////// @@ -486,7 +486,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo ShapeDescriptor descriptor(tmpShapeInfo); RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + resultShapeInfo = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); return true; } @@ -525,7 +525,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo ShapeDescriptor descriptor(tmpShapeInfo); RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = const_cast(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor)); + resultShapeInfo = const_cast(ConstantShapeHelper::getInstance().createShapeInfo(descriptor)); return true; } @@ -594,7 +594,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo ShapeDescriptor descriptor(newShapeInfo); RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary(); } std::vector ShapeUtils::pullShapeFromShapeInfo(const Nd4jLong *shapeInfo) { @@ -745,7 +745,7 @@ std::vector ShapeUtils::shapeAsVector(const Nd4jLong* shapeInfo) { ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo)); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(outputShapeInfo); RELEASE(outputShapeInfo, workspace); return result; } @@ -832,7 +832,7 @@ std::vector ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandSh shape[1] = 1; } - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'f', 2, shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'f', 2, shape); RELEASE(shape, workspace); diff --git a/libnd4j/include/helpers/impl/helper_hash.cpp b/libnd4j/include/helpers/impl/helper_hash.cpp index b12acb273..4fde919cd 100644 --- a/libnd4j/include/helpers/impl/helper_hash.cpp +++ b/libnd4j/include/helpers/impl/helper_hash.cpp @@ -24,11 +24,9 @@ namespace sd { namespace ops { - HashHelper* HashHelper::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new HashHelper(); - - return _INSTANCE; + HashHelper& HashHelper::getInstance() { + static HashHelper instance; + return instance; } Nd4jLong HashHelper::getLongHash(std::string& str) { @@ -64,8 +62,6 @@ namespace sd { return h; } - - sd::ops::HashHelper* sd::ops::HashHelper::_INSTANCE = 0; } } diff --git a/libnd4j/include/helpers/logger.h b/libnd4j/include/helpers/logger.h index c13785ff7..b7ed88c1d 100644 --- a/libnd4j/include/helpers/logger.h +++ b/libnd4j/include/helpers/logger.h @@ -32,9 +32,9 @@ #ifndef __CUDA_ARCH__ -#define nd4j_debug(FORMAT, ...) if (sd::Environment::getInstance()->isDebug() && sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); -#define nd4j_logger(FORMAT, ...) if (sd::Environment::getInstance()->isDebug() && sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); -#define nd4j_verbose(FORMAT, ...) if (sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); +#define nd4j_debug(FORMAT, ...) if (sd::Environment::getInstance().isDebug() && sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); +#define nd4j_logger(FORMAT, ...) if (sd::Environment::getInstance().isDebug() && sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); +#define nd4j_verbose(FORMAT, ...) if (sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__); #define nd4j_printf(FORMAT, ...) sd::Logger::info(FORMAT, __VA_ARGS__); #define nd4j_printv(FORMAT, VECTOR) sd::Logger::printv(FORMAT, VECTOR); diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h index 65cf29b66..719b086cb 100644 --- a/libnd4j/include/helpers/shape.h +++ b/libnd4j/include/helpers/shape.h @@ -384,9 +384,9 @@ namespace shape { * @param rank the rank of the shape */ - ND4J_EXPORT _CUDA_HD int isMatrix(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD int isMatrix(const Nd4jLong *shape, int rank); - INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shapeInfo); + INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shapeInfo); /** * Returns the shape portion of an information * buffer @@ -2346,7 +2346,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * @param shape the shape of the array * @param rank the rank of the shape */ - INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shape, int rank) { if (rank > 2) return 0; else if (rank <= 2) { @@ -2357,7 +2357,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) return 1; } - INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shapeInfo) { + INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shapeInfo) { return isMatrix(shape::shapeOf(shapeInfo),shape::rank(shapeInfo)); } diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h index c72b0d535..29c629b5a 100755 --- a/libnd4j/include/legacy/NativeOps.h +++ b/libnd4j/include/legacy/NativeOps.h @@ -1567,8 +1567,9 @@ ND4J_EXPORT void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd typedef sd::ConstantDataBuffer OpaqueConstantDataBuffer; +typedef sd::ConstantShapeBuffer OpaqueConstantShapeBuffer; -ND4J_EXPORT OpaqueConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty); +ND4J_EXPORT OpaqueConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty); ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length); ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length); @@ -1577,9 +1578,12 @@ ND4J_EXPORT OpaqueConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::Con ND4J_EXPORT Nd4jPointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer* dbf); ND4J_EXPORT Nd4jPointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer* dbf); ND4J_EXPORT Nd4jLong getConstantDataBufferLength(OpaqueConstantDataBuffer* dbf); -ND4J_EXPORT Nd4jLong getConstantDataBufferSizeOf(OpaqueConstantDataBuffer* dbf); -ND4J_EXPORT void deleteShapeBuffer(OpaqueConstantDataBuffer* ptr); +ND4J_EXPORT Nd4jPointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer* dbf); +ND4J_EXPORT Nd4jPointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer* dbf); + +ND4J_EXPORT void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer* ptr); +ND4J_EXPORT void deleteConstantDataBuffer(OpaqueConstantDataBuffer* ptr); typedef sd::graph::Context OpaqueContext; typedef sd::graph::RandomGenerator OpaqueRandomGenerator; diff --git a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp index ad75922e4..6b6c51a13 100644 --- a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp @@ -245,7 +245,7 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo)) return; - if (!sd::Environment::getInstance()->isExperimentalBuild()) + if (!sd::Environment::getInstance().isExperimentalBuild()) if ((yType != xType && yType != sd::DataType::BOOL) || xType != zType) throw sd::datatype_exception::build("NativeOps::execBroadcast both operands must have same data type", xType, yType); @@ -338,7 +338,7 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo)) return; - if (!sd::Environment::getInstance()->isExperimentalBuild()) + if (!sd::Environment::getInstance().isExperimentalBuild()) if (yType != xType || sd::DataType::BOOL != zType) throw sd::datatype_exception::build("NativeOps::execInverseBroadcastBool both operands must have same data type", xType, yType); @@ -496,7 +496,7 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); #endif } @@ -531,7 +531,7 @@ void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); } @@ -564,7 +564,7 @@ void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); } @@ -603,7 +603,7 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -631,7 +631,7 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -659,7 +659,7 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -687,7 +687,7 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo); - samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads()); + samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads()); } //////////////////////////////////////////////////////////////////////// @@ -844,13 +844,13 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, sd::TadPack tadPack; if(xLen == yLen) { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); } else if(yLen > xLen) { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hYShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hYShapeInfo, dimension, dimensionLength); } else { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); } auto func = PRAGMA_THREADS_FOR { @@ -878,7 +878,7 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); // TODO: make it 2d auto func = PRAGMA_THREADS_FOR { @@ -911,13 +911,13 @@ void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, sd::TadPack tadPack; if(xLen == yLen) { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); } else if(yLen > xLen) { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hYShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hYShapeInfo, dimension, dimensionLength); } else { - tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); } auto func = PRAGMA_THREADS_FOR { @@ -969,7 +969,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); #endif } @@ -1006,7 +1006,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance().maxMasterThreads())); #endif } @@ -1041,7 +1041,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); } @@ -1077,7 +1077,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance().maxMasterThreads())); } //////////////////////////////////////////////////////////////////////// @@ -1110,7 +1110,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, }; auto zLen = shape::length(hZShapeInfo); - samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max(1, sd::math::nd4j_min(zLen / 1024, sd::Environment::getInstance().maxMasterThreads()))); } @@ -1146,7 +1146,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, }; auto yLen = shape::length(hScalarShapeInfo); - samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance()->maxMasterThreads())); + samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min(yLen, sd::Environment::getInstance().maxMasterThreads())); } //////////////////////////////////////////////////////////////////////// @@ -1259,7 +1259,7 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformFloat, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, FLOAT_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1281,7 +1281,7 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformBool, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, BOOL_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1310,7 +1310,7 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformAny, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, LIBND4J_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads()))); } } @@ -1333,7 +1333,7 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformSame, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// @@ -1355,7 +1355,7 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformStrict, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), FLOAT_TYPES); }; - samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads()))); + samediff::Threads::parallel_do(func, sd::math::nd4j_max(1, sd::math::nd4j_min(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads()))); } //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index ae8a22a6a..f9e3f669c 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -85,12 +85,12 @@ using namespace sd; void setElementThreshold(int num) { if (num > 0) - sd::Environment::getInstance()->setElementwiseThreshold(num); + sd::Environment::getInstance().setElementwiseThreshold(num); } void setTADThreshold(int num) { if (num > 0) - sd::Environment::getInstance()->setTadThreshold(num); + sd::Environment::getInstance().setTadThreshold(num); } /** @@ -133,7 +133,7 @@ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); @@ -184,8 +184,8 @@ void execBroadcast(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -223,8 +223,8 @@ void execBroadcastBool(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -450,7 +450,7 @@ void execReduceFloat2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -485,7 +485,7 @@ void execReduceBool2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); @@ -521,7 +521,7 @@ void execReduceSame2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); @@ -557,7 +557,7 @@ void execReduceLong2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); auto hTADOffsets = tadPack.primaryOffsets(); @@ -663,7 +663,7 @@ void execReduce3Tad(Nd4jPointer *extraPointers, yTadOnlyShapeInfo, yTadOffsets); } else { // going tad-way - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); @@ -1060,7 +1060,7 @@ void initializeDevicesAndFunctions() { } void initializeFunctions(Nd4jPointer *functions) { - sd::BlasHelper::getInstance()->initializeFunctions(functions); + sd::BlasHelper::getInstance().initializeFunctions(functions); } /** @@ -1208,11 +1208,11 @@ int getAvailableDevices() { } void enableDebugMode(bool reallyEnable) { - sd::Environment::getInstance()->setDebug(reallyEnable); + sd::Environment::getInstance().setDebug(reallyEnable); } void enableVerboseMode(bool reallyEnable) { - sd::Environment::getInstance()->setVerbose(reallyEnable); + sd::Environment::getInstance().setVerbose(reallyEnable); } void setGridLimit(int gridSize) { @@ -1222,7 +1222,7 @@ void setGridLimit(int gridSize) { sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); try { - *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + *pack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -1285,7 +1285,7 @@ void pullRowsGeneric(void *vx, int elementsPerThread = n / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); auto func = PRAGMA_THREADS_FOR { for (auto idx = start; idx < stop; idx++) { @@ -1557,7 +1557,7 @@ void shuffle(Nd4jPointer *extras, bool isExperimentalEnabled() { - return sd::Environment::getInstance()->isExperimentalBuild(); + return sd::Environment::getInstance().isExperimentalBuild(); } @@ -1920,7 +1920,7 @@ Nd4jPointer getResultWrapperPointer(sd::graph::ResultWrapper* ptr) { } const char* getAllCustomOps() { - return sd::ops::OpRegistrator::getInstance()->getAllCustomOperations(); + return sd::ops::OpRegistrator::getInstance().getAllCustomOperations(); } template @@ -2016,7 +2016,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla sd::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, dArgs, numDArgs); } catch (std::exception &e) { @@ -2047,7 +2047,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); } catch (std::exception &e) { @@ -2059,7 +2059,7 @@ sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); auto context = reinterpret_cast(opContext); return op->execute(context); @@ -2157,7 +2157,7 @@ Nd4jStatus realExec(sd::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4jL int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); @@ -2170,7 +2170,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat try { auto graph = sd::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - sd::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); + sd::graph::GraphHolder::getInstance().registerGraph(graphId, graph); return ND4J_STATUS_OK; } catch (std::exception &e) { @@ -2181,7 +2181,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat } static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { - auto graph = sd::graph::GraphHolder::getInstance()->cloneGraph(graphId); + auto graph = sd::graph::GraphHolder::getInstance().cloneGraph(graphId); auto varSpace = graph->getVariableSpace(); std::vector handles; @@ -2264,7 +2264,7 @@ void* getVariableBuffer(sd::graph::Variable* variable) { int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { - sd::graph::GraphHolder::getInstance()->dropGraphAny(graphId); + sd::graph::GraphHolder::getInstance().dropGraphAny(graphId); return sd::Status::OK(); } @@ -2294,7 +2294,7 @@ void deleteVariablesSet(sd::graph::VariablesSet* pointer) { } const char* getAllOperations() { - return sd::OpTracker::getInstance()->exportOperations(); + return sd::OpTracker::getInstance().exportOperations(); } @@ -2694,10 +2694,10 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { } } -sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) { +sd::ConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) { try { - auto buffer = new ConstantDataBuffer(); - *buffer = sd::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + auto buffer = new ConstantShapeBuffer(); + *buffer = sd::ConstantShapeHelper::getInstance().bufferForShapeInfo( ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); return buffer; } catch (std::exception &e) { @@ -2707,10 +2707,14 @@ sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides } } -void deleteShapeBuffer(sd::ConstantDataBuffer* ptr) { +void deleteConstantShapeBuffer(sd::ConstantShapeBuffer* ptr) { delete ptr; } +void deleteConstantDataBuffer(sd::ConstantDataBuffer* ptr) { + delete ptr; +} + void deleteTadPack(sd::TadPack* ptr) { delete ptr; } @@ -2725,7 +2729,7 @@ sd::ConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, i sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor) { try { - return sd::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + return sd::ConstantHelper::getInstance().constantBuffer(*descriptor, dtype); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -2733,6 +2737,14 @@ sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescripto } } +Nd4jPointer getConstantShapeBufferPrimary(sd::ConstantShapeBuffer* dbf) { + return const_cast(dbf->primary()); +} + +Nd4jPointer getConstantShapeBufferSpecial(sd::ConstantShapeBuffer* dbf) { + return const_cast(dbf->special()); +} + Nd4jPointer getConstantDataBufferPrimary(sd::ConstantDataBuffer* dbf) { return dbf->primary(); } @@ -2884,7 +2896,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return const_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); + return const_cast(sd::ConstantShapeHelper::getInstance().createFromExisting(shapeBuffer, true)); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -2983,7 +2995,7 @@ const char* runLightBenchmarkSuit(bool printOut) { } Nd4jLong getCachedMemory(int deviceId) { - return sd::ConstantHelper::getInstance()->getCachedAmount(deviceId); + return sd::ConstantHelper::getInstance().getCachedAmount(deviceId); } const char* runFullBenchmarkSuit(bool printOut) { diff --git a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu index f01daffd7..14cbf306a 100644 --- a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu +++ b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu @@ -252,7 +252,7 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, if (yType != xType) throw std::runtime_error("NativeOpExecutioner::execBroadcastBool requires both X & Y operands to have same type"); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("F3B opNum:[%i]\n", opNum); dim3 launchDims(256, 256, 1024); @@ -437,7 +437,7 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, if (yType != xType || zType != xType) throw std::runtime_error("NativeOpExecutioner::execBroadcastInt requires both X & Y operands to have same type"); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("F3BI opNum:[%i]\n", opNum); dim3 launchDims(256, 256, 1024); @@ -583,7 +583,7 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("SF7 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -618,7 +618,7 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("LF7 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -654,7 +654,7 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("BF7 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -701,7 +701,7 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, auto reductionPointer = lc->getReductionPointer(); auto allocationPointer = lc->getAllocationPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("F2 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -745,7 +745,7 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("F8 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -780,7 +780,7 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, void *hZ, Nd4jLong const* hZShapeInfo, void *dZ, Nd4jLong const* dZShapeInfo){ - if (sd::Environment::getInstance()->isDebug()) + if (sd::Environment::getInstance().isDebug()) printf("F1 opNum:[%i]\n", opNum); auto stream = lc->getCudaStream(); @@ -792,7 +792,7 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); dim3 launchDims(numBlocks == 0 ? 1 : numBlocks, blockWidth, 32768); - if (sd::Environment::getInstance()->isDebugAndVerbose() && launchDims.x == 1) + if (sd::Environment::getInstance().isDebugAndVerbose() && launchDims.x == 1) printf("AF1 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1649,12 +1649,12 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, auto allocationPointer = lc->getAllocationPointer(); auto reductionPointer = lc->getReductionPointer(); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("D119 opNum:[%i]\n", opNum); dim3 launchDims(shape::length(hZShapeInfo), 256, 32768); - if (sd::Environment::getInstance()->isVerbose() && launchDims.x == 1) + if (sd::Environment::getInstance().isVerbose() && launchDims.x == 1) printf("AD119 opNum:[%i]\n", opNum); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 465029207..1ccc2c7d5 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -237,9 +237,9 @@ void execPairwiseTransform( Nd4jPointer *extraPointers, InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execPairwiseTransform(&lc, opNum, dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), extraParams); + NativeOpExecutioner::execPairwiseTransform(&lc, opNum, dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY}); } catch (std::exception &e) { @@ -260,9 +260,9 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY}); @@ -284,9 +284,9 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), biasCorrected); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); @@ -319,9 +319,9 @@ void execBroadcastBool(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execBroadcastBool(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); @@ -373,9 +373,9 @@ void execBroadcast( LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execBroadcast(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); @@ -407,9 +407,9 @@ void execReduceFloat(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceFloatScalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT()); + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special()); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); } catch (std::exception &e) { @@ -429,9 +429,9 @@ void execReduceSame(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceSameScalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT()); + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special()); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); } catch (std::exception &e) { @@ -454,15 +454,15 @@ void execReduceSame2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceSame(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); @@ -487,15 +487,15 @@ void execReduceLong2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceLong(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); @@ -534,9 +534,9 @@ void execReduceLong(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction, ::execReduceScalar(launchDims, stream, opNum, - dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), hXShapeInfo, + dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), hXShapeInfo, extraParams, - dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), hXShapeInfo, + dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), hXShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES); sd::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed"); @@ -562,15 +562,15 @@ void execReduceBool2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceBool(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); @@ -609,9 +609,9 @@ void execReduceBool(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction, ::execReduceScalar(launchDims, stream, opNum, - dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), hXShapeInfo, + dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), hXShapeInfo, extraParams, - dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), hZShapeInfo, + dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), hZShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES); sd::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed"); @@ -648,15 +648,15 @@ void execIndexReduce(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execIndexReduce(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), (int *) dbDimension->special(), dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); @@ -690,15 +690,15 @@ void execReduceFloat2(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduceFloat(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); @@ -728,9 +728,9 @@ void execIndexReduceScalar( LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execIndexReduceScalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT()); + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special()); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); } catch (std::exception &e) { @@ -752,8 +752,8 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execTransformSame(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, tadShapeInfo, tadOffsets); @@ -777,8 +777,8 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execTransformBool(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, tadShapeInfo, tadOffsets); @@ -803,8 +803,8 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum, reinterpret_cast(extraPointers[6])); NativeOpExecutioner::execTransformAny(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, nullptr, nullptr); @@ -828,8 +828,8 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execTransformStrict(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, tadShapeInfo, tadOffsets); @@ -853,8 +853,8 @@ void execTransformFloat(Nd4jPointer *extraPointers,int opNum, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execTransformFloat(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams, tadShapeInfo, tadOffsets); @@ -939,7 +939,7 @@ void enableP2P(bool enable) { cudaDeviceDisablePeerAccess(dY); } } else { - if (sd::Environment::getInstance()->isVerbose()) printf("Peer access [%i] -> [%i] isn't possible\n", dX, dY); + if (sd::Environment::getInstance().isVerbose()) printf("Peer access [%i] -> [%i] isn't possible\n", dX, dY); } } } @@ -983,7 +983,7 @@ void initializeDevicesAndFunctions() { } void initializeFunctions(Nd4jPointer *functions) { - sd::BlasHelper::getInstance()->initializeDeviceFunctions(functions); + sd::BlasHelper::getInstance().initializeDeviceFunctions(functions); /* cublasSgemv = (CublasSgemv)functions[0]; cublasDgemv = (CublasDgemv)functions[1]; @@ -1317,7 +1317,7 @@ int getAvailableDevices() { } void enableDebugMode(bool reallyEnable) { - sd::Environment::getInstance()->setDebug(reallyEnable); + sd::Environment::getInstance().setDebug(reallyEnable); } void setGridLimit(int gridSize) { @@ -1345,7 +1345,7 @@ void setOmpNumThreads(int threads) { } void enableVerboseMode(bool reallyEnable) { - sd::Environment::getInstance()->setVerbose(reallyEnable); + sd::Environment::getInstance().setVerbose(reallyEnable); } int getDeviceMajor(int device) { @@ -1386,7 +1386,7 @@ void specialConcat( sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength) { try { auto pack = new TadPack(); - *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); + *pack = sd::ConstantTadHelper::getInstance().tadForDimensions(dXShapeInfo, dimension, dimensionLength); return pack; } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); @@ -1502,7 +1502,7 @@ void average(Nd4jPointer *extras, auto dX = reinterpret_cast(dx); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("averageFloat called\n"); auto xType = sd::ArrayOptions::dataType(xShapeInfo); @@ -1536,7 +1536,7 @@ void accumulate(Nd4jPointer *extras, auto dX = reinterpret_cast(dx); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("accumulateFloat called\n"); auto xType = sd::ArrayOptions::dataType(xShapeInfo); @@ -1591,7 +1591,7 @@ void shuffle(Nd4jPointer *extras, } bool isExperimentalEnabled() { - return sd::Environment::getInstance()->isExperimentalBuild(); + return sd::Environment::getInstance().isExperimentalBuild(); } void setOmpMinThreads(int threads) { @@ -1623,9 +1623,9 @@ void execSummaryStats(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execSummaryStats(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), biasCorrected); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); @@ -1653,9 +1653,9 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execSummaryStats(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), reinterpret_cast(dbDimension->special()), dimensionLength, tadShapeInfo, tadOffsets, biasCorrected); @@ -1679,10 +1679,10 @@ void execReduce3(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduce3(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT()); + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special()); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY}); } catch (std::exception &e) { @@ -1708,7 +1708,7 @@ void execReduce3Tad(Nd4jPointer *extraPointers, auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, shape::length(hDimensionShape)); auto tadLength = shape::length(tadPack.primaryShapeInfo()); @@ -1720,18 +1720,18 @@ void execReduce3Tad(Nd4jPointer *extraPointers, if (tadLength == yLength || tadLength == xLength) { // nd4j_printf("== way\n",""); NativeOpExecutioner::execReduce3(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); } else NativeOpExecutioner::execReduce3TAD(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dimension, dimensionLength, tadOnlyShapeInfo, yTadOffsets, yTadOnlyShapeInfo, yTadOffsets); @@ -1753,10 +1753,10 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduce3Scalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT()); + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special()); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY}); } catch (std::exception &e) { @@ -1777,9 +1777,9 @@ void execScalarBool(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execScalarBool(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), - dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), + dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(), extraParams); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1808,10 +1808,10 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execScalarBool(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParams, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), - dbScalars->primary(), hScalarShapeInfo, dbScalars->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), + dbScalars->primary(), hScalarShapeInfo, dbScalars->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(), dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); @@ -1834,9 +1834,9 @@ void execScalar(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execScalar(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), - dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), + dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(), extraParams); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1877,7 +1877,7 @@ void execScalarTad(Nd4jPointer *extraPointers, #ifdef __ND4J_EXPERIMENTAL__ BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES); #else - BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), dbScalars->special(), extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dbScalars->special(), extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); #endif DEBUG_KERNEL(stream, opNum); @@ -1938,7 +1938,7 @@ void execRandom(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execRandom(&lc, opNum, stateHost, - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraArguments); InteropDataBuffer::registerSpecialUse({dbZ}, {}); @@ -1958,8 +1958,8 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execRandom(&lc, opNum, stateHost, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraArguments); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX}); @@ -1980,9 +1980,9 @@ void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execRandom(&lc, opNum, stateHost, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraArguments); InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY}); @@ -2216,10 +2216,10 @@ void execReduce3All(Nd4jPointer *extraPointers, LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); NativeOpExecutioner::execReduce3All(&lc, opNum, - dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT(), + dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), extraParamsVals, - dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT(), - dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT(), + dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(), + dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), reinterpret_cast(dbDimension->special()), dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); @@ -2458,7 +2458,7 @@ void sortTadByKey(Nd4jPointer *extraPointers, auto stream = reinterpret_cast(extraPointers[1]); auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() : reinterpret_cast(extraPointers[0]); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); auto xType = sd::ArrayOptions::dataType(xShapeInfo); auto yType = sd::ArrayOptions::dataType(yShapeInfo); @@ -2485,7 +2485,7 @@ void sortTadByValue(Nd4jPointer *extraPointers, auto stream = reinterpret_cast(extraPointers[1]); auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() : reinterpret_cast(extraPointers[0]); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); auto xType = sd::ArrayOptions::dataType(yShapeInfo); auto yType = sd::ArrayOptions::dataType(xShapeInfo); @@ -2515,7 +2515,7 @@ void sortTad(Nd4jPointer *extraPointers, auto stream = reinterpret_cast(extraPointers[1]); auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() : reinterpret_cast(extraPointers[0]); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768); auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, oesTadGeneric, @@ -2561,7 +2561,7 @@ Nd4jPointer getResultWrapperPointer(sd::graph::ResultWrapper* ptr) { const char* getAllCustomOps() { - return sd::ops::OpRegistrator::getInstance()->getAllCustomOperations(); + return sd::ops::OpRegistrator::getInstance().getAllCustomOperations(); } @@ -2608,7 +2608,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla sd::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, dArgs, numDArgs); @@ -2639,7 +2639,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); } catch (std::exception &e) { @@ -2742,7 +2742,7 @@ static FORCEINLINE Nd4jStatus realExec(sd::ops::DeclarableOp* op, Nd4jPointer* e int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); @@ -2755,7 +2755,7 @@ int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBu int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { try { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash); auto context = reinterpret_cast(opContext); auto result = op->execute(context); @@ -2786,7 +2786,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat try { auto graph = sd::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - sd::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); + sd::graph::GraphHolder::getInstance().registerGraph(graphId, graph); return ND4J_STATUS_OK; } catch (std::exception &e) { @@ -2798,7 +2798,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { - auto graph = sd::graph::GraphHolder::getInstance()->pullGraph(graphId); + auto graph = sd::graph::GraphHolder::getInstance().pullGraph(graphId); auto varSpace = graph->getVariableSpace()->clone(); std::vector handles; @@ -2887,7 +2887,7 @@ void* getVariableBuffer(sd::graph::Variable* variable) { int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { try { - sd::graph::GraphHolder::getInstance()->dropGraphAny(graphId); + sd::graph::GraphHolder::getInstance().dropGraphAny(graphId); return ND4J_STATUS_OK; } catch (std::exception &e) { @@ -2929,7 +2929,7 @@ void deleteShapeList(Nd4jPointer shapeList) { } const char* getAllOperations() { - return sd::OpTracker::getInstance()->exportOperations(); + return sd::OpTracker::getInstance().exportOperations(); } Nd4jPointer getGraphState(Nd4jLong id) { @@ -3360,7 +3360,7 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { cudaStream_t stream; cudaStreamCreate(&stream); - tryPointerKernel << < 256, 512, len + 64, stream >> > (p, len); + tryPointerKernel <<< 256, 512, len + 64, stream>>> (p, len); auto e = cudaStreamSynchronize(stream); if (e != 0) @@ -3376,10 +3376,11 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { int dataTypeFromNpyHeader(void *header) { return (int) cnpy::dataTypeFromHeader(reinterpret_cast(header)); } -sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) { + +OpaqueConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) { try { - auto buffer = new ConstantDataBuffer(); - *buffer = sd::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + auto buffer = new ConstantShapeBuffer(); + *buffer = sd::ConstantShapeHelper::getInstance().bufferForShapeInfo( ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); return buffer; } catch (std::exception &e) { @@ -3389,19 +3390,23 @@ sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides } } -void deleteShapeBuffer(sd::ConstantDataBuffer* ptr) { +void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer* ptr) { delete ptr; } +void deleteConstantDataBuffer(OpaqueConstantDataBuffer* ptr) { + delete ptr; +} + void deleteTadPack(sd::TadPack* ptr) { delete ptr; } bool isBlasVersionMatches(int major, int minor, int build) { - auto result = major == Environment::getInstance()->_blasMajorVersion && minor == Environment::getInstance()->_blasMinorVersion && build == Environment::getInstance()->_blasPatchVersion; + auto result = major == Environment::getInstance()._blasMajorVersion && minor == Environment::getInstance()._blasMinorVersion && build == Environment::getInstance()._blasPatchVersion; if (!result) { - nd4j_printf("CUDA/cuBLAS version mismatch. Expected: %i.%i.%i but got %i.%i.%i instead\n", Environment::getInstance()->_blasMajorVersion, Environment::getInstance()->_blasMinorVersion, Environment::getInstance()->_blasPatchVersion, major, minor, build); + nd4j_printf("CUDA/cuBLAS version mismatch. Expected: %i.%i.%i but got %i.%i.%i instead\n", Environment::getInstance()._blasMajorVersion, Environment::getInstance()._blasMinorVersion, Environment::getInstance()._blasPatchVersion, major, minor, build); sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(152); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage("CUDA/cuBLAS version mismatch"); } @@ -3410,15 +3415,15 @@ bool isBlasVersionMatches(int major, int minor, int build) { } sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length) { - return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); + return sd::ConstantHelper::getInstance().constantBuffer(ConstantDescriptor(data, length), dtype); } sd::ConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length) { - return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); + return sd::ConstantHelper::getInstance().constantBuffer(ConstantDescriptor(data, length), dtype); } sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor) { - return sd::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + return sd::ConstantHelper::getInstance().constantBuffer(*descriptor, dtype); } @@ -3435,6 +3440,13 @@ Nd4jLong getConstantDataBufferSizeOf(sd::ConstantDataBuffer* dbf) { return dbf->sizeOf(); } +Nd4jPointer getConstantShapeBufferPrimary(sd::ConstantShapeBuffer* dbf) { + return const_cast(dbf->primary()); +} + +Nd4jPointer getConstantShapeBufferSpecial(sd::ConstantShapeBuffer* dbf) { + return const_cast(dbf->special()); +} sd::graph::Context* createGraphContext(int nodeId) { return new sd::graph::Context(nodeId); @@ -3563,7 +3575,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes + return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance().createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -3612,7 +3624,7 @@ const char* runFullBenchmarkSuit(bool printOut) { } Nd4jLong getCachedMemory(int deviceId) { - return sd::ConstantHelper::getInstance()->getCachedAmount(deviceId); + return sd::ConstantHelper::getInstance().getCachedAmount(deviceId); } sd::LaunchContext* defaultLaunchContext() { diff --git a/libnd4j/include/legacy/impl/Environment.cpp b/libnd4j/include/legacy/impl/Environment.cpp index b19a7147b..38d7e82ed 100644 --- a/libnd4j/include/legacy/impl/Environment.cpp +++ b/libnd4j/include/legacy/impl/Environment.cpp @@ -214,11 +214,9 @@ namespace sd { _maxDeviceMemory = maxBytes; } - Environment *Environment::getInstance() { - if (_instance == 0) - _instance = new Environment(); - - return _instance; + Environment& Environment::getInstance() { + static Environment instance; + return instance; } bool Environment::isVerbose() { @@ -353,27 +351,27 @@ namespace sd { } void Environment::setGroupLimit(int group, Nd4jLong numBytes) { - sd::memory::MemoryCounter::getInstance()->setGroupLimit((sd::memory::MemoryType) group, numBytes); + sd::memory::MemoryCounter::getInstance().setGroupLimit((sd::memory::MemoryType) group, numBytes); } void Environment::setDeviceLimit(int deviceId, Nd4jLong numBytes) { - sd::memory::MemoryCounter::getInstance()->setDeviceLimit(deviceId, numBytes); + sd::memory::MemoryCounter::getInstance().setDeviceLimit(deviceId, numBytes); } Nd4jLong Environment::getGroupLimit(int group) { - return sd::memory::MemoryCounter::getInstance()->groupLimit((sd::memory::MemoryType) group); + return sd::memory::MemoryCounter::getInstance().groupLimit((sd::memory::MemoryType) group); } Nd4jLong Environment::getDeviceLimit(int deviceId) { - return sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId); + return sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId); } Nd4jLong Environment::getGroupCounter(int group) { - return sd::memory::MemoryCounter::getInstance()->allocatedGroup((sd::memory::MemoryType) group); + return sd::memory::MemoryCounter::getInstance().allocatedGroup((sd::memory::MemoryType) group); } Nd4jLong Environment::getDeviceCounter(int deviceId) { - return sd::memory::MemoryCounter::getInstance()->allocatedDevice(deviceId); + return sd::memory::MemoryCounter::getInstance().allocatedDevice(deviceId); } uint64_t Environment::maxPrimaryMemory() { @@ -383,7 +381,4 @@ namespace sd { uint64_t Environment::maxSpecialMemory() { return _maxTotalSpecialMemory.load(); } - - sd::Environment *sd::Environment::_instance = 0; - } diff --git a/libnd4j/include/loops/cpu/broadcasting.hpp b/libnd4j/include/loops/cpu/broadcasting.hpp index c0f22313b..4c59de0ec 100644 --- a/libnd4j/include/loops/cpu/broadcasting.hpp +++ b/libnd4j/include/loops/cpu/broadcasting.hpp @@ -103,7 +103,7 @@ namespace broadcast { auto tadOffsets = xTadOffset; if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); xTadShapeShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); @@ -396,7 +396,7 @@ namespace broadcast { auto tadOffsets = yTadOffset; if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength); yTadShapeShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); @@ -416,7 +416,7 @@ namespace broadcast { int tadsPerThread = tads / TAD_THRESHOLD; int threads = sd::math::nd4j_max(1, tadsPerThread); - threads = sd::math::nd4j_min(threads, sd::Environment::getInstance()->maxThreads()); + threads = sd::math::nd4j_min(threads, sd::Environment::getInstance().maxThreads()); auto yEws = shape::elementWiseStride(yTadShapeShapeInfo); auto xEws = shape::elementWiseStride(xShapeInfo); diff --git a/libnd4j/include/loops/cpu/broadcasting_bool.hpp b/libnd4j/include/loops/cpu/broadcasting_bool.hpp index 18c8705e2..a15935124 100644 --- a/libnd4j/include/loops/cpu/broadcasting_bool.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_bool.hpp @@ -115,7 +115,7 @@ namespace broadcast { auto tadOffsets = xTadOffset; if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); tadOffsets = const_cast(tadPack.primaryOffsets()); @@ -135,7 +135,7 @@ namespace broadcast { int tadsPerThread = tads / TAD_THRESHOLD; int threads = sd::math::nd4j_max(1, tadsPerThread); - threads = sd::math::nd4j_min(threads, sd::Environment::getInstance()->maxThreads()); + threads = sd::math::nd4j_min(threads, sd::Environment::getInstance().maxThreads()); auto xEws = shape::elementWiseStride(xTadShapeShapeInfo); auto yEws = shape::elementWiseStride(yShapeInfo); @@ -280,7 +280,7 @@ namespace broadcast { auto tadOffsets = yTadOffset; if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength); yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); tadOffsets = const_cast(tadPack.primaryOffsets()); @@ -300,7 +300,7 @@ namespace broadcast { int tadsPerThread = tads / TAD_THRESHOLD; int threads = sd::math::nd4j_max(1, tadsPerThread); - threads = sd::math::nd4j_min(threads, sd::Environment::getInstance()->maxThreads()); + threads = sd::math::nd4j_min(threads, sd::Environment::getInstance().maxThreads()); auto yEws = shape::elementWiseStride(yTadShapeShapeInfo); auto xEws = shape::elementWiseStride(xShapeInfo); diff --git a/libnd4j/include/loops/cpu/broadcasting_int.hpp b/libnd4j/include/loops/cpu/broadcasting_int.hpp index 7d0a995d6..39b251594 100644 --- a/libnd4j/include/loops/cpu/broadcasting_int.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_int.hpp @@ -108,7 +108,7 @@ namespace functions { auto tadOffsets = xTadOffset; if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); tadOffsets = const_cast(tadPack.primaryOffsets()); @@ -128,7 +128,7 @@ namespace functions { int tadsPerThread = tads / TAD_THRESHOLD; int threads = sd::math::nd4j_max(1, tadsPerThread); - threads = sd::math::nd4j_min(threads, sd::Environment::getInstance()->maxThreads()); + threads = sd::math::nd4j_min(threads, sd::Environment::getInstance().maxThreads()); auto xEws = shape::elementWiseStride(xTadShapeShapeInfo); auto yEws = shape::elementWiseStride(yShapeInfo); @@ -271,7 +271,7 @@ namespace functions { auto tadOffsets = yTadOffset; if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength); yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); tadOffsets = const_cast(tadPack.primaryOffsets()); @@ -291,7 +291,7 @@ namespace functions { int tadsPerThread = tads / TAD_THRESHOLD; int threads = sd::math::nd4j_max(1, tadsPerThread); - threads = sd::math::nd4j_min(threads, sd::Environment::getInstance()->maxThreads()); + threads = sd::math::nd4j_min(threads, sd::Environment::getInstance().maxThreads()); auto yEws = shape::elementWiseStride(yTadShapeShapeInfo); auto xEws = shape::elementWiseStride(xShapeInfo); diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp index 9373e3feb..d46dd89d7 100644 --- a/libnd4j/include/loops/cpu/indexreduce.hpp +++ b/libnd4j/include/loops/cpu/indexreduce.hpp @@ -64,7 +64,7 @@ Nd4jLong IndexReduce::execScalar(const void *vx, const Nd4jLong *xShapeInf uint xShapeInfoCast[MAX_RANK]; bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); IndexValue intermediatery[64]; for (int e = 0; e < maxThreads; e++) intermediatery[e].index = -1; @@ -142,7 +142,7 @@ void IndexReduce::exec(const void *vx, const Nd4jLong *xShapeInfo, if (dimensionLength < 1) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); tadOnlyShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); diff --git a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp index 708f3c0d7..94e156705 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp @@ -166,7 +166,7 @@ namespace functions { if (dimensionLength < 1) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); tadOnlyShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); } @@ -193,7 +193,7 @@ namespace functions { Z _CUDA_H ReduceBoolFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD diff --git a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp index 1795dbc3d..6be93b1c4 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp @@ -70,7 +70,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD @@ -200,7 +200,7 @@ namespace functions { if (dimensionLength < 0) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); tadOnlyShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); } @@ -229,7 +229,7 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD diff --git a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp index c1fd4385c..a4fae3228 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp @@ -65,7 +65,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD @@ -187,7 +187,7 @@ namespace functions { if (dimensionLength < 1) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); tadOnlyShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); } @@ -215,7 +215,7 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; PRAGMA_OMP_SIMD diff --git a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp index 2516767b6..10607fb6d 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp @@ -67,7 +67,7 @@ namespace functions { auto startingValue = OpType::startingValue(x); uint xShapeInfoCast[MAX_RANK]; const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); X intermediate[64]; PRAGMA_OMP_SIMD @@ -196,7 +196,7 @@ namespace functions { if (dimensionLength < 1) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); tadOnlyShapeInfo = tadPack.primaryShapeInfo(); tadOffsets = tadPack.primaryOffsets(); } @@ -224,7 +224,7 @@ namespace functions { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); X intermediate[64]; PRAGMA_OMP_SIMD diff --git a/libnd4j/include/loops/cpu/reduce3.hpp b/libnd4j/include/loops/cpu/reduce3.hpp index 3a830377e..a19c7c1a1 100644 --- a/libnd4j/include/loops/cpu/reduce3.hpp +++ b/libnd4j/include/loops/cpu/reduce3.hpp @@ -65,7 +65,7 @@ void Reduce3::execScalar(const void *vx, const Nd4jLong *xShapeInfo, const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); Z startingVal = OpType::startingValue(x); - int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); + int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance().maxThreads()); Z intermediate[64]; Z extraParamsLocal[3 * 64]; diff --git a/libnd4j/include/loops/cpu/scalar.hpp b/libnd4j/include/loops/cpu/scalar.hpp index 236ba7e25..f539f387f 100644 --- a/libnd4j/include/loops/cpu/scalar.hpp +++ b/libnd4j/include/loops/cpu/scalar.hpp @@ -65,7 +65,7 @@ void ScalarTransform::transform(const void *vx, const Nd4jLong *xShapeI return; } - int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance()->maxThreads()); + int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance().maxThreads()); if (kindOfLoop == sd::LoopKind::EWS1) { for (auto r = start; r < stop; r++) { diff --git a/libnd4j/include/loops/cpu/scalar_bool.cpp b/libnd4j/include/loops/cpu/scalar_bool.cpp index 72513c10d..63182bdc3 100644 --- a/libnd4j/include/loops/cpu/scalar_bool.cpp +++ b/libnd4j/include/loops/cpu/scalar_bool.cpp @@ -66,7 +66,7 @@ namespace functions { return; } - int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance()->maxThreads()); + int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance().maxThreads()); if (kindOfLoop == sd::LoopKind::EWS1) { for (auto r = start; r < stop; r++) { diff --git a/libnd4j/include/loops/cpu/scalar_int.cpp b/libnd4j/include/loops/cpu/scalar_int.cpp index 1a8f5bcca..adf53e7f6 100644 --- a/libnd4j/include/loops/cpu/scalar_int.cpp +++ b/libnd4j/include/loops/cpu/scalar_int.cpp @@ -66,7 +66,7 @@ namespace functions { return; } - int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance()->maxThreads()); + int num_threads = sd::math::nd4j_min(numTads, sd::Environment::getInstance().maxThreads()); if (kindOfLoop == sd::LoopKind::EWS1) { for (auto r = start; r < stop; r++) { diff --git a/libnd4j/include/loops/cpu/summarystatsreduce.cpp b/libnd4j/include/loops/cpu/summarystatsreduce.cpp index 2d53671d2..63993d853 100644 --- a/libnd4j/include/loops/cpu/summarystatsreduce.cpp +++ b/libnd4j/include/loops/cpu/summarystatsreduce.cpp @@ -127,7 +127,7 @@ namespace functions { if (dimensionLength < 1) return; - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); //pre squeezed: this is for keeping the pointer to the original //shape information for tad offset diff --git a/libnd4j/include/loops/cuda/legacy/transform.legacy b/libnd4j/include/loops/cuda/legacy/transform.legacy index e7f76751a..88a4ceb16 100644 --- a/libnd4j/include/loops/cuda/legacy/transform.legacy +++ b/libnd4j/include/loops/cuda/legacy/transform.legacy @@ -173,7 +173,7 @@ namespace functions { DISPATCH_SIMPLE(transformShaped, float16, PARAMS(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(TRANSFORM_OPS)) - if (sd::Environment::getInstance()->isDebug()) + if (sd::Environment::getInstance().isDebug()) checkCudaErrors(cudaStreamSynchronize(*stream)); } diff --git a/libnd4j/include/loops/cuda/scalar.chpp b/libnd4j/include/loops/cuda/scalar.chpp index b412e4957..93b76f910 100644 --- a/libnd4j/include/loops/cuda/scalar.chpp +++ b/libnd4j/include/loops/cuda/scalar.chpp @@ -152,7 +152,7 @@ void _CUDA_H ScalarTransform::intermediateAlongDimension(dim3& launchDims template void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams) { - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); DISPATCH_BY_OPNUM_TTT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, hxShapeInfo, vz, zShapeInfo, hzShapeInfo, vscalar, vextraParams, nullptr), SCALAR_OPS); diff --git a/libnd4j/include/loops/cuda/scalar_bool.cu b/libnd4j/include/loops/cuda/scalar_bool.cu index e23560778..0976e60ad 100644 --- a/libnd4j/include/loops/cuda/scalar_bool.cu +++ b/libnd4j/include/loops/cuda/scalar_bool.cu @@ -218,7 +218,7 @@ void ScalarBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t void const* vscalar, void const* vextraParams) { - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, const_cast(vextraParams), nullptr), SCALAR_BOOL_OPS); diff --git a/libnd4j/include/loops/cuda/scalar_int.cu b/libnd4j/include/loops/cuda/scalar_int.cu index 2ca0ade26..b8cac0846 100644 --- a/libnd4j/include/loops/cuda/scalar_int.cu +++ b/libnd4j/include/loops/cuda/scalar_int.cu @@ -216,7 +216,7 @@ void ScalarIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *st void const* vscalar, void* vextraParams) { - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, vextraParams, nullptr), SCALAR_INT_OPS); diff --git a/libnd4j/include/loops/cuda/summarystatsreduce.cu b/libnd4j/include/loops/cuda/summarystatsreduce.cu index 3d94b9097..521ac5b06 100644 --- a/libnd4j/include/loops/cuda/summarystatsreduce.cu +++ b/libnd4j/include/loops/cuda/summarystatsreduce.cu @@ -344,7 +344,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI auto z = reinterpret_cast(vz); auto reductionPointerA = reinterpret_cast(reductionBuffer); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("D16 opNum:[%i]\n", opNum); summaryStatsReduceT<<>>( @@ -369,7 +369,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("F17 opNum:[%i]\n", opNum); auto reductionPointerA = reinterpret_cast(reductionBuffer); @@ -396,7 +396,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); - if (sd::Environment::getInstance()->isDebugAndVerbose()) + if (sd::Environment::getInstance().isDebugAndVerbose()) printf("D18 opNum:[%i]\n", opNum); summaryStatsReduceT<<>>( diff --git a/libnd4j/include/memory/MemoryCounter.h b/libnd4j/include/memory/MemoryCounter.h index 91aaeecff..160c24379 100644 --- a/libnd4j/include/memory/MemoryCounter.h +++ b/libnd4j/include/memory/MemoryCounter.h @@ -34,8 +34,6 @@ namespace sd { */ class ND4J_EXPORT MemoryCounter { private: - static MemoryCounter* _INSTANCE; - // used for synchronization std::mutex _locker; @@ -56,7 +54,7 @@ namespace sd { ~MemoryCounter() = default; public: - static MemoryCounter *getInstance(); + static MemoryCounter & getInstance(); /** * This method checks if allocation of numBytes won't break through per-group or per-device limit diff --git a/libnd4j/include/memory/MemoryRegistrator.h b/libnd4j/include/memory/MemoryRegistrator.h index ad1b0333a..70afafb42 100644 --- a/libnd4j/include/memory/MemoryRegistrator.h +++ b/libnd4j/include/memory/MemoryRegistrator.h @@ -32,7 +32,6 @@ namespace sd { namespace memory { class ND4J_EXPORT MemoryRegistrator { protected: - static MemoryRegistrator* _INSTANCE; Workspace* _workspace; MAP_IMPL _footprint; std::mutex _lock; @@ -40,7 +39,7 @@ namespace sd { MemoryRegistrator(); ~MemoryRegistrator() = default; public: - static MemoryRegistrator* getInstance(); + static MemoryRegistrator& getInstance(); bool hasWorkspaceAttached(); Workspace* getWorkspace(); void attachWorkspace(Workspace* workspace); diff --git a/libnd4j/include/memory/MemoryTracker.h b/libnd4j/include/memory/MemoryTracker.h index 38bb926ca..dd99905bd 100644 --- a/libnd4j/include/memory/MemoryTracker.h +++ b/libnd4j/include/memory/MemoryTracker.h @@ -35,7 +35,6 @@ namespace sd { */ class ND4J_EXPORT MemoryTracker { private: - static MemoryTracker* _INSTANCE; std::map _allocations; std::map _released; std::mutex _locker; @@ -43,7 +42,7 @@ namespace sd { MemoryTracker(); ~MemoryTracker() = default; public: - static MemoryTracker* getInstance(); + static MemoryTracker& getInstance(); void countIn(MemoryType type, Nd4jPointer ptr, Nd4jLong numBytes); void countOut(Nd4jPointer ptr); diff --git a/libnd4j/include/memory/impl/MemoryCounter.cpp b/libnd4j/include/memory/impl/MemoryCounter.cpp index 96be34681..287b19897 100644 --- a/libnd4j/include/memory/impl/MemoryCounter.cpp +++ b/libnd4j/include/memory/impl/MemoryCounter.cpp @@ -36,19 +36,17 @@ namespace sd { } // setting initial values for limits - _groupLimits[sd::memory::MemoryType::HOST] = sd::Environment::getInstance()->maxPrimaryMemory(); - _groupLimits[sd::memory::MemoryType::DEVICE] = sd::Environment::getInstance()->maxSpecialMemory(); + _groupLimits[sd::memory::MemoryType::HOST] = sd::Environment::getInstance().maxPrimaryMemory(); + _groupLimits[sd::memory::MemoryType::DEVICE] = sd::Environment::getInstance().maxSpecialMemory(); // setting initial counter values _groupCounters[sd::memory::MemoryType::HOST] = 0; _groupCounters[sd::memory::MemoryType::DEVICE] = 0; } - MemoryCounter* MemoryCounter::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new MemoryCounter(); - - return _INSTANCE; + MemoryCounter& MemoryCounter::getInstance() { + static MemoryCounter instance; + return instance; } void MemoryCounter::countIn(int deviceId, Nd4jLong numBytes) { @@ -127,7 +125,5 @@ namespace sd { std::lock_guard lock(_locker); return _groupLimits[group]; } - - MemoryCounter* MemoryCounter::_INSTANCE = 0; } } \ No newline at end of file diff --git a/libnd4j/include/memory/impl/MemoryRegistrator.cpp b/libnd4j/include/memory/impl/MemoryRegistrator.cpp index 31b4b0eae..0ac2bf0cb 100644 --- a/libnd4j/include/memory/impl/MemoryRegistrator.cpp +++ b/libnd4j/include/memory/impl/MemoryRegistrator.cpp @@ -27,11 +27,9 @@ namespace sd { _workspace = nullptr; }; - MemoryRegistrator* MemoryRegistrator::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new MemoryRegistrator(); - - return _INSTANCE; + MemoryRegistrator& MemoryRegistrator::getInstance() { + static MemoryRegistrator instance; + return instance; } bool MemoryRegistrator::hasWorkspaceAttached() { @@ -83,8 +81,5 @@ namespace sd { return result; } - - MemoryRegistrator* MemoryRegistrator::_INSTANCE = 0; - } } \ No newline at end of file diff --git a/libnd4j/include/memory/impl/MemoryTracker.cpp b/libnd4j/include/memory/impl/MemoryTracker.cpp index 5ebb4fd16..cf2b975cf 100644 --- a/libnd4j/include/memory/impl/MemoryTracker.cpp +++ b/libnd4j/include/memory/impl/MemoryTracker.cpp @@ -40,11 +40,9 @@ namespace sd { // } - MemoryTracker* MemoryTracker::getInstance() { - if (_INSTANCE == 0) - _INSTANCE = new MemoryTracker(); - - return _INSTANCE; + MemoryTracker& MemoryTracker::getInstance() { + static MemoryTracker instance; + return instance; } #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD) && !defined(SD_APPLE_BUILD) @@ -99,7 +97,7 @@ namespace sd { void MemoryTracker::countIn(MemoryType type, Nd4jPointer ptr, Nd4jLong numBytes) { #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD) && !defined(SD_APPLE_BUILD) - if (Environment::getInstance()->isDetectingLeaks()) { + if (Environment::getInstance().isDetectingLeaks()) { auto lptr = reinterpret_cast(ptr); _locker.lock(); @@ -133,7 +131,7 @@ namespace sd { void MemoryTracker::countOut(Nd4jPointer ptr) { #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD) && !defined(SD_APPLE_BUILD) - if (Environment::getInstance()->isDetectingLeaks()) { + if (Environment::getInstance().isDetectingLeaks()) { auto lptr = reinterpret_cast(ptr); _locker.lock(); @@ -172,7 +170,5 @@ namespace sd { _allocations.clear(); _released.clear(); } - - MemoryTracker* MemoryTracker::_INSTANCE = 0; } } diff --git a/libnd4j/include/ops/declarable/OpRegistrator.h b/libnd4j/include/ops/declarable/OpRegistrator.h index 3a9fb3df6..a4967d877 100644 --- a/libnd4j/include/ops/declarable/OpRegistrator.h +++ b/libnd4j/include/ops/declarable/OpRegistrator.h @@ -97,7 +97,7 @@ namespace sd { public: ~OpRegistrator(); - static OpRegistrator* getInstance(); + static OpRegistrator& getInstance(); static void exitHandler(); static void sigIntHandler(int sig); diff --git a/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp b/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp index 65f81b428..693ebf7c6 100644 --- a/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp +++ b/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp @@ -41,7 +41,7 @@ namespace sd { } DECLARE_SHAPE_FN(bits_hamming_distance) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64)); } DECLARE_TYPES(bits_hamming_distance) { diff --git a/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp b/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp index 194af35b8..79227e2ba 100644 --- a/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp +++ b/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp @@ -110,7 +110,7 @@ DECLARE_SHAPE_FN(batched_gemm) { auto shapeList = SHAPELIST(); if (!(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'c', {1, 1})); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'c', {1, 1})); return shapeList; } @@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(batched_gemm) { std::vector shape({M, N}); for (int e = 0; e < batchSize; e++) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'f', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'f', shape); shapeList->push_back(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp index c9d8c9476..f8ee952a8 100644 --- a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp +++ b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp @@ -131,7 +131,7 @@ DECLARE_SHAPE_FN(matmul) { // we just pick the higher data type out of X and Y auto dtypeZ = dtypeX > dtypeY ? dtypeX : dtypeY; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtypeZ, zOrder, zShapeOnly); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtypeZ, zOrder, zShapeOnly); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp b/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp index 889bd4957..0ae64b8cd 100644 --- a/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp +++ b/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp @@ -80,7 +80,7 @@ DECLARE_SHAPE_FN(tensormmul) { std::vector shapeAt, shapeBt; auto outShape = sd::ShapeUtils::evalShapeForTensorDot(aShapeInfo, bShapeInfo, axes_0, axes_1, permutAt, permutBt, shapeAt, shapeBt); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(aShapeInfo), 'c', outShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(aShapeInfo), 'c', outShape))); } //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp index e5d67baf1..a28d8230b 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp @@ -86,9 +86,9 @@ namespace sd { helpers::chooseFunctorScalar(block.launchContext(), first, scalar, mode, nullptr, &numResults); } - auto newShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(numResults.e(0), ArrayOptions::dataType(inputShape->at(0))); + auto newShape = ConstantShapeHelper::getInstance().vectorShapeInfo(numResults.e(0), ArrayOptions::dataType(inputShape->at(0))); - auto shapeScalar = ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64); + auto shapeScalar = ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64); return SHAPELIST(newShape, shapeScalar); } diff --git a/libnd4j/include/ops/declarable/generic/boolean/where.cpp b/libnd4j/include/ops/declarable/generic/boolean/where.cpp index c26179179..a72de2ee0 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/where.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/where.cpp @@ -117,7 +117,7 @@ namespace sd { theNewShape = CONSTANT(newShape); } else { - theNewShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64); + theNewShape = ConstantShapeHelper::getInstance().emptyShapeInfo(sd::DataType::INT64); } return SHAPELIST(theNewShape); diff --git a/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp b/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp index 65cb52cdd..23284b2f9 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp @@ -139,11 +139,11 @@ namespace sd { // output shape - a tuple of rank(inShape) 1D tensors with numOfTrue len if (numOfTrue) { for (Nd4jLong e = 0; e < condition->rankOf(); ++e) { - shapes->push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(numOfTrue, sd::DataType::INT64)); + shapes->push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(numOfTrue, sd::DataType::INT64)); } } else { - shapes->push_back(ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64)); + shapes->push_back(ConstantShapeHelper::getInstance().emptyShapeInfo(sd::DataType::INT64)); } } return shapes; diff --git a/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp b/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp index 95dbdfcea..a2dcd6b14 100644 --- a/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp +++ b/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp @@ -56,7 +56,7 @@ namespace sd { auto dtype = values->dataType(); // basically output shape is defined by the type of input, and desired shape input - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape->getBufferAsVector())); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape->getBufferAsVector())); } DECLARE_TYPES(compat_sparse_to_dense) { diff --git a/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp b/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp index 40e080a8f..009652178 100644 --- a/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp +++ b/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp @@ -121,8 +121,8 @@ namespace sd { // values tensor is going to be vector always // indices tensor is going to be vector with length equal to values.length * output rank - auto valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(cnt, sd::DataType::UTF8); - auto indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(cnt * (input->rankOf() + 1), sd::DataType::INT64); + auto valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(cnt, sd::DataType::UTF8); + auto indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(cnt * (input->rankOf() + 1), sd::DataType::INT64); return SHAPELIST(indicesShape, valuesShape); } diff --git a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp index 4b77e2a45..7e89ce2c0 100644 --- a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp +++ b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp @@ -75,8 +75,8 @@ namespace sd { auto input = inputShape->at(0); auto outputLength = shape::length(input) / 16 + 5; - auto encodedShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(outputLength, DataType::INT32); - auto encodedCounter = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32); + auto encodedShape = ConstantShapeHelper::getInstance().vectorShapeInfo(outputLength, DataType::INT32); + auto encodedCounter = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32); return SHAPELIST(input, encodedShape, encodedCounter); } diff --git a/libnd4j/include/ops/declarable/generic/compression/threshold.cpp b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp index 9512621e8..83836bb8f 100644 --- a/libnd4j/include/ops/declarable/generic/compression/threshold.cpp +++ b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp @@ -65,7 +65,7 @@ namespace sd { elements = 0; // result array must have 4 additional int elements for header - return SHAPELIST(x->shapeInfo(), sd::ConstantShapeHelper::getInstance()->vectorShapeInfo(elements + 4, DataType::INT32)); + return SHAPELIST(x->shapeInfo(), sd::ConstantShapeHelper::getInstance().vectorShapeInfo(elements + 4, DataType::INT32)); } DECLARE_TYPES(encode_threshold) { diff --git a/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp b/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp index fe42d7057..294406cb8 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp @@ -64,11 +64,11 @@ namespace sd { auto outputSize = DataTypeUtils::sizeOf(newType); if (shape::length(inShape) == 0) - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType))); if (inputSize == outputSize) { // only type should be changed - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType))); } else if (inputSize > outputSize) { // range of output increased by 1 with inputSize / outputSize as last dimension @@ -78,7 +78,7 @@ namespace sd { shapeOf[i] = inShape[i + 1]; } shapeOf[i] = inputSize / outputSize; - auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(newType, shape::order(inShape), shapeOf); + auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(newType, shape::order(inShape), shapeOf); return SHAPELIST(outputShape); } REQUIRE_TRUE(shape::sizeAt(inShape, -1) == outputSize / inputSize, 0, "BITCAST: %llu > %llu. So last dimension should be %i, but %i given.", inputSize, outputSize, outputSize / inputSize, shape::sizeAt(inShape, -1)); @@ -88,7 +88,7 @@ namespace sd { shapeOf[i] = inShape[i + 1]; } - auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(newType, shape::order(inShape), shapeOf); + auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(newType, shape::order(inShape), shapeOf); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp b/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp index cf8729d2f..ff071f7a9 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp @@ -49,7 +49,7 @@ namespace sd { auto it = INT_ARG(0); DataType newType = DataTypeUtils::fromInt(it); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType))); } DECLARE_TYPES(cast) { diff --git a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h index 7df331c4d..af7f2d8d7 100644 --- a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h +++ b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h @@ -40,7 +40,7 @@ namespace sd { } std::unique_ptr ptr; - if (!Environment::getInstance()->isExperimentalBuild()) { + if (!Environment::getInstance().isExperimentalBuild()) { if (y->dataType() != x->dataType()) { y = new NDArray(y->cast(x->dataType())); std::unique_ptr ptr2(y); diff --git a/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp b/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp index b8ce12d64..3c101070d 100644 --- a/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp +++ b/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp @@ -76,7 +76,7 @@ namespace sd { outputShape[2] = height; outputShape[3] = in[4]; - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(in), shape::order(in), outputShape, 4))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(in), shape::order(in), outputShape, 4))); } DECLARE_TYPES(crop_and_resize) { diff --git a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp index 4e680b337..8e6e29d3a 100644 --- a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp +++ b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp @@ -82,7 +82,7 @@ namespace sd { auto dtype = DataType::FLOAT32; if (method == helpers::ImageResizeMethods::kResizeNearest) dtype = ArrayOptions::dataType(in); - auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector{in[1], height, width, in[4]}:std::vector{ height, width, in[4]}); + auto shape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector{in[1], height, width, in[4]}:std::vector{ height, width, in[4]}); return SHAPELIST(shape); } diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp index 18d048450..a26e47746 100644 --- a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp +++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp @@ -118,7 +118,7 @@ namespace sd { else if (shape::rank(in) == 3) shape = {height, width, in[3]}; - auto outShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, shape::order(in), shape); + auto outShape = ConstantShapeHelper::getInstance().createShapeInfo(DataType::FLOAT32, shape::order(in), shape); return SHAPELIST(outShape); } DECLARE_TYPES(resize_images) { diff --git a/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp b/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp index f7378d333..a6d80365c 100644 --- a/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp +++ b/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp @@ -67,7 +67,7 @@ DECLARE_SHAPE_FN(rgb_to_grs) { auto nShape = input->getShapeAsVector(); nShape[dimC] = 1; - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(input->dataType(), input->ordering(), nShape)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(input->dataType(), input->ordering(), nShape)); } } diff --git a/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp b/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp index 8ef699aa2..334014ee7 100644 --- a/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp +++ b/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp @@ -45,7 +45,7 @@ namespace sd { auto input = inputShape->at(0); // always return scalar here - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(input))); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(input))); } DECLARE_TYPES(knn_mindistance) { diff --git a/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp index 925c4b6c1..6562a02a8 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp @@ -72,7 +72,7 @@ namespace ops { ShapeUtils::updateStridesAndType(outShapeInfo, inputShapeInfo, shape::order(inputShapeInfo)); - return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(outShapeInfo, block.workspace())); + return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(outShapeInfo, block.workspace())); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/eye.cpp b/libnd4j/include/ops/declarable/generic/linalg/eye.cpp index 41469468c..4bf339614 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/eye.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/eye.cpp @@ -100,7 +100,7 @@ namespace ops { } shape::updateStrides(outShapeInfo, static_cast(-params[0])); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo, dtype)); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo, dtype)); RELEASE(outShapeInfo, block.getWorkspace()); return SHAPELIST(result); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp index 81831e3fc..5078ff6f1 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp @@ -92,10 +92,10 @@ namespace sd { if (shape::isEmpty(in0) || shape::isEmpty(in1)) { shapeOf[rank - 1] = 0; // set output shape to empty } - auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); + auto resShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { // ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); - resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in0)); + resShape = ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(in0)); } return SHAPELIST(resShape); } @@ -116,9 +116,9 @@ namespace sd { if (shape::isEmpty(in0) || shape::isEmpty(in1)) { shapeOf[rank - 1] = 0; // set output shape to empty } - auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); + auto resShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { - resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in1)); + resShape = ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(in1)); // ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); } return SHAPELIST(resShape); diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp index deabe8443..db73fac75 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp @@ -46,7 +46,7 @@ namespace sd { int lastDimension = sd::math::nd4j_min(shape::sizeAt(in, -1), shape::sizeAt(in, -2)); if(outRank == 1) { //output shape is a vector with size min(sizeAt(0), sizeAt(1)) - outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(lastDimension, ArrayOptions::dataType(in)); + outShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(lastDimension, ArrayOptions::dataType(in)); } else { Nd4jLong* anShapeInfo; diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp index edd10e6ea..7046b69f9 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp @@ -42,13 +42,13 @@ namespace sd { int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only - determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape)); } else if (targetRank == 1) { // vector - determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); } else { // only two last dimensions are excluded - determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); + determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); } return SHAPELIST(determinantShape); } @@ -89,13 +89,13 @@ namespace sd { int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only - determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape)); } else if (targetRank == 1) { // vector - determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); } else { // only two last dimensions are excluded - determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); + determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); } return SHAPELIST(determinantShape); } @@ -130,13 +130,13 @@ namespace sd { int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only - determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape)); } else if (targetRank == 1) { // vector - determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); + determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape)); } else { // only two last dimensions are excluded - determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); + determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); } return SHAPELIST(determinantShape); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp index 9a351a13f..1cdfc6884 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp @@ -57,20 +57,20 @@ namespace sd { if (!fullMatricies) { // outputs are: Q is MxN and R is NxN shape[targetRank - 1] = shape::sizeAt(inShape, -1); shape[targetRank - 2] = shape[targetRank - 1]; - shapeQ = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), + shapeQ = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); - shapeR = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), + shapeR = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); } else {// otherwise outputs are Q is MxM and R is MxN with zero filled rows shape[targetRank - 1] = shape::sizeAt(inShape, -2); shape[targetRank - 2] = shape[targetRank - 1]; - shapeR = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), + shapeR = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape)); - shapeQ = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), + shapeQ = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp b/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp index 9a9fb730b..915ba5fb9 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp @@ -71,14 +71,14 @@ namespace sd { helpers::adjustAxis(input->rankOf(), axisVector, axis); //std::vector dims = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto scalarShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))); + auto scalarShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))); auto sumShape = ShapeUtils::evalReduceShapeInfo('c', axis, *input, false, false, block.workspace()); auto squareShape = ShapeUtils::evalReduceShapeInfo('c', axis, *input, false, false, block.workspace()); auto shapeList = SHAPELIST(scalarShape, sumShape, squareShape); if (block.numT() > 0) - shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); + shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/linalg/svd.cpp b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp index ca5fd52c2..3331dcdd8 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/svd.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp @@ -101,14 +101,14 @@ DECLARE_SHAPE_FN(svd) { shape::updateStrides(uShapeInfo, shape::order(inShapeInfo)); shape::updateStrides(vShapeInfo, shape::order(inShapeInfo)); - auto result = SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(sShapeInfo)), ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(uShapeInfo)), ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(vShapeInfo))); + auto result = SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(sShapeInfo)), ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(uShapeInfo)), ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(vShapeInfo))); RELEASE(sShapeInfo, block.workspace()); RELEASE(uShapeInfo, block.workspace()); RELEASE(vShapeInfo, block.workspace()); return result; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(sShapeInfo, block.workspace())); + return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(sShapeInfo, block.workspace())); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/trace.cpp b/libnd4j/include/ops/declarable/generic/linalg/trace.cpp index fa9fd5f56..1a67ec754 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/trace.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/trace.cpp @@ -58,7 +58,7 @@ DECLARE_SHAPE_FN(trace) { outShapeInfo[i] = inShapeInfo[i]; shape::updateStrides(outShapeInfo, shape::order(inShapeInfo)); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo, ArrayOptions::dataType(inShapeInfo))); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo, ArrayOptions::dataType(inShapeInfo))); RELEASE(outShapeInfo, block.getWorkspace()); return SHAPELIST(result); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/tri.cpp b/libnd4j/include/ops/declarable/generic/linalg/tri.cpp index c7e1a125b..d0c1f7a6f 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/tri.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/tri.cpp @@ -53,7 +53,7 @@ DECLARE_SHAPE_FN(tri) { auto dtype = block.numD() ? D_ARG(0) : DataType::FLOAT32; - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', {rows, cols})); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', {rows, cols})); } diff --git a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp index d745b0209..0d5d1d011 100644 --- a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp @@ -121,9 +121,9 @@ DECLARE_SHAPE_FN(absolute_difference_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp index 4d134f6b1..99cf2e3c1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp @@ -143,7 +143,7 @@ DECLARE_SHAPE_FN(cosine_distance_loss) { // evaluate output shapeInfo Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else { // in this case output has the same shape as labels reduced by dim axis std::vector dimensions = {dim}; diff --git a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp index fe66387a8..71e7489ea 100644 --- a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp @@ -128,9 +128,9 @@ namespace sd { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); diff --git a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp index df57092e1..2d0b44b3c 100644 --- a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp @@ -133,9 +133,9 @@ DECLARE_SHAPE_FN(huber_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp index 3afeea2ba..48f3a64fa 100644 --- a/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp @@ -38,7 +38,7 @@ namespace sd { return Status::OK(); } DECLARE_SHAPE_FN(l2_loss) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); } DECLARE_TYPES(l2_loss) { diff --git a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp index e43e7b1d1..ab0c8923e 100644 --- a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp @@ -130,9 +130,9 @@ DECLARE_SHAPE_FN(log_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp index b39326071..5cc6b60ab 100644 --- a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp @@ -133,9 +133,9 @@ namespace ops { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(labelsShapeInfo, outType)); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(labelsShapeInfo, outType)); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp index 5a0e20807..f36fa3c62 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp @@ -200,7 +200,7 @@ namespace sd { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else { // in this case output has the shape as labels and logits minus last dimension std::vector dimensions = {-1}; outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(predictionsShapeInfo), dimensions, predictionsShapeInfo, false, true, block.getWorkspace()); diff --git a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp index fd00a0364..6c54706c4 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp @@ -129,9 +129,9 @@ DECLARE_SHAPE_FN(mean_sqerr_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and predictions - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); diff --git a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp index f2e665bdb..ddd28d43d 100644 --- a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp @@ -140,9 +140,9 @@ DECLARE_SHAPE_FN(sigm_cross_entropy_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else // in this case output has the same shape as labels and logits - outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp index f70a58a10..79d46e448 100644 --- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp @@ -161,7 +161,7 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss) { Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar - outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); + outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType); else { // in this case output has the shape as labels and logits minus last dimension std::vector dimensions = {-1}; outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(logitsShapeInfo), dimensions, logitsShapeInfo, false, true, block.getWorkspace()); @@ -384,9 +384,9 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss_grad) { auto outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - auto dLdpShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo))); - auto dLdwShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(weightsShapeInfo), shape::shapeOf(weightsShapeInfo), shape::rank(weightsShapeInfo))); - auto dLdlShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + auto dLdpShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo))); + auto dLdwShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(weightsShapeInfo), shape::shapeOf(weightsShapeInfo), shape::rank(weightsShapeInfo))); + auto dLdlShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp index 6dab14365..0636450c7 100644 --- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp @@ -127,8 +127,8 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss_with_logits_grad) { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - auto dLdpShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo))); - auto dLdlShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); + auto dLdpShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo))); + auto dLdlShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo))); return SHAPELIST(dLdpShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp b/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp index 539b21145..df107451a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp @@ -61,7 +61,7 @@ namespace sd { shape.emplace_back(shape::shapeOf(inShape)[e]); shape[shape.size()-1] *= 2; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); return SHAPELIST(newShape); } @@ -106,7 +106,7 @@ namespace sd { DECLARE_SHAPE_FN(crelu_bp) { auto inShape = inputShape->at(0); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape))); } } } diff --git a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp index 56684c569..7018ae342 100644 --- a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp @@ -333,10 +333,10 @@ DECLARE_SHAPE_FN(batchnorm_bp) { auto shapes = SHAPELIST(); // dLdI shapeInfo - shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(outType, inShapeInfo)); + shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(outType, inShapeInfo)); // dLdM shapeInfo - shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(outType, meanShapeInfo)); + shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(outType, meanShapeInfo)); // dLdV shapeInfo (same as dLdM) shapes->push_back(shapes->at(shapes->size()-1)); diff --git a/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp b/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp index eec864c5e..bc164e952 100644 --- a/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp @@ -58,7 +58,7 @@ DECLARE_SHAPE_FN(biasadd) { auto yShape = inputShape->at(1); auto dtype = ArrayOptions::dataType(yShape); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(xShape, dtype))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(xShape, dtype))); } DECLARE_TYPES(biasadd) { diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp index e0440692b..d62a98d52 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp @@ -159,7 +159,7 @@ DECLARE_SHAPE_FN(deconv2d) { outputShape[3] = oC; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(weightsShapeInfo), shape::order(inputShapeInfo), outputShape, 4))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(weightsShapeInfo), shape::order(inputShapeInfo), outputShape, 4))); } DECLARE_TYPES(deconv2d_bp) { diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp index ae97c3d65..9af389bf6 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp @@ -144,7 +144,7 @@ DECLARE_SHAPE_FN(deconv2d_tf) { shape[3] = iC; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weightsShapeInfo), shape::order(gradOShapeInfo), 4, shape)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(weightsShapeInfo), shape::order(gradOShapeInfo), 4, shape)); } } diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp index c3ecddf53..b3a0e1667 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp @@ -107,7 +107,7 @@ namespace ops { rates = r->template asVectorT(); } else { if (block.numI() < 9) { - auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); + auto newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType()); return SHAPELIST(newShape); } @@ -127,7 +127,7 @@ namespace ops { helpers::dilation_hw(block.launchContext(), input, weights, strides, rates, isSameShape, &sH, &sW, &pH, &pW, &dH, &dW, &oH, &oW); std::array shape = {{bS, oH, oW, iC}}; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp index c80608e03..49dc52a03 100644 --- a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp @@ -113,8 +113,8 @@ namespace ops { auto keys_shape = inputShape->at(1); auto values_shape = inputShape->at(2); - auto weights_shape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(keys_shape, query_shape, true, false)); - auto output_shape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(values_shape, weights_shape, false, false)); + auto weights_shape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(keys_shape, query_shape, true, false)); + auto output_shape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(values_shape, weights_shape, false, false)); if(INT_ARG(1)){ return SHAPELIST(output_shape, weights_shape); diff --git a/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp b/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp index 0888854ee..0f4a01e03 100644 --- a/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp @@ -94,7 +94,7 @@ DECLARE_SHAPE_FN(embedding_lookup) { for (int e = 1; e < outRank; e++) shapeInfo[e] = shape::sizeAt(inShapeInfo, e); - auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo); + auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo); return SHAPELIST(outShapeInfo); } @@ -106,7 +106,7 @@ DECLARE_SHAPE_FN(embedding_lookup) { for (int e = 1; e < outRank; e++) shapeInfo[e] = shape::sizeAt(inShapeInfo, e); - auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo); + auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp index f9b7284f1..7ff8eb4c5 100644 --- a/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp @@ -138,8 +138,8 @@ namespace ops { auto numHeads = shape::sizeAt(WkShape, 0); auto timeSteps = shape::sizeAt(keysShape, 2); - auto weightsShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, numHeads, timeSteps, queryCount}); - auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, outSize, queryCount}); + auto weightsShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, numHeads, timeSteps, queryCount}); + auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, outSize, queryCount}); if(INT_ARG(1)){ return SHAPELIST(outputShape, weightsShape); diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp index b93cbe47f..fde075667 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp @@ -133,7 +133,7 @@ DECLARE_SHAPE_FN(avgpool2d) { newShape[3] = iD; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), newShape, 4))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), newShape, 4))); } DECLARE_TYPES(avgpool2d_bp) { @@ -210,7 +210,7 @@ DECLARE_SHAPE_FN(avgpool2d_bp) { REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "AVGPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]); REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "AVGPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); } diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp index 85b8d8833..d8df11385 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp @@ -135,7 +135,7 @@ DECLARE_SHAPE_FN(avgpool3dnew) { outputShape[4] = iC; } // TF DOC: A Tensor. Has the same type as input. - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5))); } DECLARE_TYPES(avgpool3dnew_bp) { @@ -202,7 +202,7 @@ CUSTOM_OP_IMPL(avgpool3dnew_bp, 2, 1, false, 0, 14) { DECLARE_SHAPE_FN(avgpool3dnew_bp) { - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); } diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp index 31dd72fc3..8a37b90b0 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp @@ -136,7 +136,7 @@ DECLARE_SHAPE_FN(maxpool2d) { newShape[3] = iC; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4))); } DECLARE_TYPES(maxpool2d_bp) { @@ -215,7 +215,7 @@ DECLARE_SHAPE_FN(maxpool2d_bp) { REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "MAXPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]); REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "MAXPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); } diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp index d1b5928b6..fd28901cc 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp @@ -137,7 +137,7 @@ DECLARE_SHAPE_FN(maxpool3dnew) { outputShape[4] = iC; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5))); } DECLARE_TYPES(maxpool3dnew_bp) { @@ -217,7 +217,7 @@ CUSTOM_OP_IMPL(maxpool3dnew_bp, 2, 1, false, 0, 14) { DECLARE_SHAPE_FN(maxpool3dnew_bp) { - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); } diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp index 111846584..eced3c2b4 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp @@ -53,8 +53,8 @@ namespace sd { DECLARE_SHAPE_FN(max_pool_with_argmax) { auto in = inputShape->at(0); - auto valuesShape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(in)); - auto indicesShape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(in, DataType::INT64)); + auto valuesShape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(in)); + auto indicesShape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(in, DataType::INT64)); return SHAPELIST(valuesShape, indicesShape); } diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp index adcd40daa..927627ff8 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp @@ -130,7 +130,7 @@ namespace sd { newShape[3] = iC; } - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4))); } @@ -225,7 +225,7 @@ DECLARE_SHAPE_FN(pnormpool2d_bp) { REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "PNORMPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]); REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "PNORMPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1))))); } } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp index a0b1e707b..0be3c8393 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp @@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(gru) { REQUIRE_TRUE(Wh->isSameShape(whCorrectShape), 0, "GRU operation: wrong shape of hidden-to-hidden weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(whCorrectShape).c_str(), ShapeUtils::shapeAsString(Wh).c_str()); REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); - auto hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); + auto hShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); return SHAPELIST(hShapeInfo); } @@ -173,11 +173,11 @@ DECLARE_SHAPE_FN(gru_bp) { REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU_BP operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); REQUIRE_TRUE(dLdh->isSameShape(hCorrectShape),0, "GRU_BP operation: wrong shape of gradient vs. ff output, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(hCorrectShape).c_str(), ShapeUtils::shapeAsString(dLdh).c_str()); - auto dLdxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->shapeInfo()); - auto dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->shapeInfo()); - auto dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->shapeInfo()); - auto dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->shapeInfo()); - auto dLdbShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->shapeInfo()); + auto dLdxShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), x->shapeInfo()); + auto dLdhIShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), hI->shapeInfo()); + auto dLdWxShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), Wx->shapeInfo()); + auto dLdWhShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), Wh->shapeInfo()); + auto dLdbShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), b->shapeInfo()); return SHAPELIST(dLdxShapeInfo, dLdhIShapeInfo, dLdWxShapeInfo, dLdWhShapeInfo, dLdbShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp index 037f09736..25c8d3744 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp @@ -111,7 +111,7 @@ DECLARE_SHAPE_FN(gruCell) { s0[2] = nU; ShapeUtils::updateStridesAndType(s0, x, shape::order(hLast)); - auto ts0 = ConstantShapeHelper::getInstance()->createFromExisting(s0, block.workspace()); + auto ts0 = ConstantShapeHelper::getInstance().createFromExisting(s0, block.workspace()); //4 output shapes, all [bs, nU] return SHAPELIST(ts0, ts0, ts0, ts0); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp index 20a9e6710..32cb481ee 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp @@ -136,7 +136,7 @@ DECLARE_SHAPE_FN(lstmCell) { ShapeUtils::updateStridesAndType(hShapeInfo, xtShapeInfo, shape::order(ht_1ShapeInfo)); ShapeUtils::updateStridesAndType(cShapeInfo, xtShapeInfo, shape::order(ct_1ShapeInfo)); - auto result = SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(hShapeInfo), ConstantShapeHelper::getInstance()->createShapeInfo(cShapeInfo)); + auto result = SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(hShapeInfo), ConstantShapeHelper::getInstance().createShapeInfo(cShapeInfo)); RELEASE(hShapeInfo, block.workspace()); RELEASE(cShapeInfo, block.workspace()); return result; diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp index a5c8b8d28..0a0754a8e 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp @@ -362,7 +362,7 @@ DECLARE_SHAPE_FN(lstmLayer) { hShape = {sL, 2, bS, nOut}; } - shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape)); + shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), hShape)); } // evaluate hL shape (output at last step) @@ -375,7 +375,7 @@ DECLARE_SHAPE_FN(lstmLayer) { else hLShape = {2, bS, nOut}; - shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape)); + shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), hLShape)); if(retLastC) // cL and hL have same shapes shapes->push_back(shapes->at(shapes->size() - 1)); @@ -391,7 +391,7 @@ DECLARE_SHAPE_FN(lstmLayer) { else cLShape = {2, bS, nOut}; - shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape)); + shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), cLShape)); } return shapes; diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp index 84dd6356a..ba4e3d52f 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp @@ -127,7 +127,7 @@ DECLARE_SHAPE_FN(sru) { ShapeUtils::updateStridesAndType(newShapeInfo1, xShapeInfo, shape::order(xShapeInfo)); ShapeDescriptor descriptor(newShapeInfo1); RELEASE(newShapeInfo1, block.getWorkspace()); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(descriptor); return SHAPELIST(result, result); } @@ -311,7 +311,7 @@ DECLARE_SHAPE_FN(sru_bp) { ShapeDescriptor descriptor3(ArrayOptions::dataType(inShape), order, {1, 2 * inSize}); ShapeDescriptor descriptor4(ArrayOptions::dataType(inShape), order, {bS, inSize}); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4)); } @@ -396,7 +396,7 @@ DECLARE_SHAPE_FN(sru_bi) { char order = shape::order(xShapeInfo); ShapeDescriptor descriptor(ArrayOptions::dataType(xShapeInfo), order, {time, bS, 2 * inSize}); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(descriptor); return SHAPELIST(result, result); } @@ -505,7 +505,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { ShapeDescriptor descriptor3(ArrayOptions::dataType(xShapeInfo), order, {4 * inSize}); ShapeDescriptor descriptor4(ArrayOptions::dataType(xShapeInfo), order, {bS, 2 * inSize}); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4)); } } @@ -771,7 +771,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { // ShapeUtils::updateStridesAndType(newShapeInfo1, inShape, order); -// auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShapeInfo1)); +// auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShapeInfo1)); // RELEASE(newShapeInfo1, block.getWorkspace()); // return SHAPELIST(result, result); // } @@ -935,5 +935,5 @@ DECLARE_SHAPE_FN(sru_bi_bp) { // ShapeDescriptor descriptor3(ArrayOptions::dataType(inShape), order, {1, 2 * inSize}); // ShapeDescriptor descriptor4(ArrayOptions::dataType(inShape), order, {bS, inSize}); -// return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4)); +// return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4)); // } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp index ee446037c..3268da453 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp @@ -97,7 +97,7 @@ DECLARE_SHAPE_FN(sruCell) { ShapeUtils::updateStridesAndType(hShapeInfo, ct_1ShapeInfo, shape::order(ct_1ShapeInfo)); ShapeUtils::updateStridesAndType(cShapeInfo, ct_1ShapeInfo, shape::order(ct_1ShapeInfo)); - return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(hShapeInfo, block.workspace()), ConstantShapeHelper::getInstance()->createFromExisting(cShapeInfo, block.workspace())); + return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(hShapeInfo, block.workspace()), ConstantShapeHelper::getInstance().createFromExisting(cShapeInfo, block.workspace())); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp index 3b9fc3916..45b864f26 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp @@ -111,7 +111,7 @@ namespace sd { outLength = sd::math::nd4j_min(outLength, max->e(0)); } - auto newshape = ConstantShapeHelper::getInstance()->vectorShapeInfo(outLength, dtype); + auto newshape = ConstantShapeHelper::getInstance().vectorShapeInfo(outLength, dtype); shapeList->push_back(newshape); return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp index 4fc31dd51..d954a0b44 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp @@ -82,7 +82,7 @@ DECLARE_SHAPE_FN(broadcast_dynamic_shape) { const int maxRank = xRank > yRank ? xRank : yRank; - auto outputShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxRank, ArrayOptions::dataType(inputShape->at(0))); + auto outputShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(maxRank, ArrayOptions::dataType(inputShape->at(0))); return SHAPELIST(outputShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp index 561c6bb5b..3d06d4ced 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp @@ -41,7 +41,7 @@ namespace sd { } DECLARE_SHAPE_FN(check_numerics) { - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0)))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0)))); } DECLARE_TYPES(check_numerics) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp index 1decc65f0..f694502b3 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp @@ -53,7 +53,7 @@ namespace sd { auto inShape = inputShape->at(0); DataType newType = DataType::UINT8; - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType))); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp index f90513ca3..f5c5cbb91 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp @@ -77,7 +77,7 @@ namespace sd { } std::array shape = {{numClasses,numClasses}}; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', 2, shape.data()); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', 2, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp index fd3315157..d9c931f21 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp @@ -61,7 +61,7 @@ namespace sd { auto var = block.getVariable(e); if (var->variableType() == VariableType::NDARRAY) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape))); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp index a243842d2..7618de5b1 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp @@ -46,7 +46,7 @@ namespace sd { auto in = inputShape->at(1); int shapeRank = shape::rank(in); - auto aShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::DataType::BOOL, shape::order(in), shape::rank(in), shape::shapeOf(in)); + auto aShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::DataType::BOOL, shape::order(in), shape::rank(in), shape::shapeOf(in)); shapeList->push_back(aShape); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp index 49c7a2957..86a37619e 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp @@ -54,8 +54,8 @@ namespace sd { REQUIRE_TRUE(saved > 0, 0, "ListDiff: no matches found"); - auto shapeX = ConstantShapeHelper::getInstance()->vectorShapeInfo(saved, values->dataType()); - auto shapeY = ConstantShapeHelper::getInstance()->vectorShapeInfo(saved, DataType::INT64); + auto shapeX = ConstantShapeHelper::getInstance().vectorShapeInfo(saved, values->dataType()); + auto shapeY = ConstantShapeHelper::getInstance().vectorShapeInfo(saved, DataType::INT64); return SHAPELIST(shapeX, shapeY); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp index ecddab3bc..91512b2f7 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp @@ -106,7 +106,7 @@ namespace sd { if (actualIndicesCount < maxOutputSize) maxOutputSize = actualIndicesCount; } - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); + outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(maxOutputSize, DataType::INT32); return SHAPELIST(outputShape); } @@ -211,7 +211,7 @@ namespace sd { if (len > 0) len = helpers::nonMaxSuppressionV3(block.launchContext(), boxes, scales, maxOutputSize, overlayThreshold, scoreThreshold, nullptr); - auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(len, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp index 30f59ff35..1cc4addbc 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp @@ -75,7 +75,7 @@ namespace sd { if (boxSize < maxOutputSize) maxOutputSize = boxSize; - auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(maxOutputSize, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp index b0a549c43..b9326a981 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp @@ -61,11 +61,11 @@ namespace sd { outShape = CONSTANT(outputShape); } else if (outRank == 1) { - outShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); } else { //outputShape = shape::createScalarShapeInfo(); - outShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in)); } return SHAPELIST(outShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp index 6349b84fe..5b25ea7e6 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp @@ -100,7 +100,7 @@ namespace sd { shape.push_back(shape::shapeOf(inShape)[e]); shape.insert(shape.begin() + axis, depth); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data()); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', rank + 1, shape.data()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp index 799572794..b042e94fe 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp @@ -76,7 +76,7 @@ namespace sd { aShape[shapeRank] = k; shape::updateStrides(aShape, shape::order(in)); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(aShape, (e == 0?ArrayOptions::dataType(in):sd::DataType::INT64)))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(aShape, (e == 0?ArrayOptions::dataType(in):sd::DataType::INT64)))); RELEASE(aShape, block.getWorkspace()); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp index 9005348a1..9d234abaa 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp @@ -46,14 +46,14 @@ namespace sd { int uniqueCount = helpers::uniqueCount(block.launchContext(), source); if (uniqueCount == 0) { // empty value Shape - valuesShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(source->dataType()); + valuesShape = ConstantShapeHelper::getInstance().emptyShapeInfo(source->dataType()); } else { // all output shapes are 1D arrays (vectors) - valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, ArrayOptions::dataType(in)); + valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, ArrayOptions::dataType(in)); } // second output is always LONG - indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::length(in), sd::DataType::INT64); + indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::length(in), sd::DataType::INT64); //COPY_SHAPE_EX(in, indicesShape, block.getWorkspace()); @@ -77,13 +77,13 @@ namespace sd { int uniqueCount = helpers::uniqueCount(block.launchContext(), source); // all output shapes are 1D arrays (vectors) // all output shapes are 1D arrays (vectors) - auto valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, source->dataType()); + auto valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, source->dataType()); // second output is always LONG - auto indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(source->lengthOf(), sd::DataType::INT64); + auto indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(source->lengthOf(), sd::DataType::INT64); // third one as well - auto countsShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, sd::DataType::INT64); + auto countsShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, sd::DataType::INT64); return SHAPELIST(valuesShape, indicesShape, countsShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp index f70e92cf5..91f0a564d 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp @@ -48,7 +48,7 @@ namespace sd { return Status::OK(); } DECLARE_SHAPE_FN(zero_fraction) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::DOUBLE)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::DOUBLE)); } DECLARE_TYPES(zero_fraction) { diff --git a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp index f0b2b587b..ded5bfee5 100644 --- a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp +++ b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp @@ -53,7 +53,7 @@ namespace sd { auto in = INPUT_VARIABLE(0); auto shape = in->template asVectorT(); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/exponential.cpp b/libnd4j/include/ops/declarable/generic/random/exponential.cpp index cac3d1a88..735bab583 100644 --- a/libnd4j/include/ops/declarable/generic/random/exponential.cpp +++ b/libnd4j/include/ops/declarable/generic/random/exponential.cpp @@ -42,7 +42,7 @@ namespace sd { auto in = INPUT_VARIABLE(0); auto shape = in->template asVectorT(); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/gamma.cpp b/libnd4j/include/ops/declarable/generic/random/gamma.cpp index e21458530..a00ce2b7e 100644 --- a/libnd4j/include/ops/declarable/generic/random/gamma.cpp +++ b/libnd4j/include/ops/declarable/generic/random/gamma.cpp @@ -68,7 +68,7 @@ namespace sd { auto dtype = ArrayOptions::dataType(alphaShape); for (auto i = 0; i < shape::rank(additionalShape); i++) shape.push_back(shape::sizeAt(additionalShape, i)); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp index 7042ae6dd..9f768e9f3 100644 --- a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp +++ b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp @@ -36,7 +36,7 @@ namespace sd { } DECLARE_SHAPE_FN(get_seed) { - auto newshape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64); + auto newshape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT64); return SHAPELIST(newshape); } diff --git a/libnd4j/include/ops/declarable/generic/random/multinomial.cpp b/libnd4j/include/ops/declarable/generic/random/multinomial.cpp index 5361d1bbb..2e8225d2c 100644 --- a/libnd4j/include/ops/declarable/generic/random/multinomial.cpp +++ b/libnd4j/include/ops/declarable/generic/random/multinomial.cpp @@ -99,7 +99,7 @@ namespace sd { nShape[dimA] = numOfSamples; DataType nType = (argSize > 1) ? ( INT_ARG(1) >= 0 ? static_cast(INT_ARG(1)) : sd::DataType::INT64) : sd::DataType::INT64; - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(nType, input->ordering(), nShape)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(nType, input->ordering(), nShape)); } DECLARE_TYPES(random_multinomial) { diff --git a/libnd4j/include/ops/declarable/generic/random/normal.cpp b/libnd4j/include/ops/declarable/generic/random/normal.cpp index f81a06786..701570784 100644 --- a/libnd4j/include/ops/declarable/generic/random/normal.cpp +++ b/libnd4j/include/ops/declarable/generic/random/normal.cpp @@ -48,7 +48,7 @@ namespace sd { auto in = INPUT_VARIABLE(0); auto shape = in->template asVectorT(); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/poisson.cpp b/libnd4j/include/ops/declarable/generic/random/poisson.cpp index 74f3a8570..eedfbbe1f 100644 --- a/libnd4j/include/ops/declarable/generic/random/poisson.cpp +++ b/libnd4j/include/ops/declarable/generic/random/poisson.cpp @@ -51,7 +51,7 @@ namespace sd { for (auto d = 0; d < shape::rank(lambdaShape); ++d ) { shape.emplace_back(shape::sizeAt(lambdaShape, d)); } - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/random_crop.cpp b/libnd4j/include/ops/declarable/generic/random/random_crop.cpp index 2ac2495d3..1b30b2f91 100644 --- a/libnd4j/include/ops/declarable/generic/random/random_crop.cpp +++ b/libnd4j/include/ops/declarable/generic/random/random_crop.cpp @@ -59,7 +59,7 @@ DECLARE_SHAPE_FN(random_crop) { for (int e = 0; e < shape.size(); e++) shape[e] = (*in).e(e); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(typeShape), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(typeShape), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp index f4c240d50..f7050f3ab 100644 --- a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp +++ b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp @@ -48,7 +48,7 @@ namespace sd { } DECLARE_SHAPE_FN(set_seed) { - auto newshape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); + auto newshape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType()); return SHAPELIST(newshape); } diff --git a/libnd4j/include/ops/declarable/generic/random/uniform.cpp b/libnd4j/include/ops/declarable/generic/random/uniform.cpp index 94df6b32d..d4abccf78 100644 --- a/libnd4j/include/ops/declarable/generic/random/uniform.cpp +++ b/libnd4j/include/ops/declarable/generic/random/uniform.cpp @@ -80,7 +80,7 @@ namespace sd { if (block.width() > 1) REQUIRE_TRUE(dtype == INPUT_VARIABLE(1)->dataType(), 0, "RandomUniform: data type of output and min/max args should be the same"); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp index 5fb452227..a347c398a 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp @@ -84,7 +84,7 @@ namespace sd { // special case - output is scalar if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype)); } return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp index 4f590aae8..68ad9d2e5 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp @@ -84,7 +84,7 @@ namespace sd { // special case - output is scalar if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype)); } return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp index 9c45b4c37..f8a2486fa 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp @@ -86,7 +86,7 @@ namespace sd { // special case - output is scalar if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype)); } return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp index 97430a24f..40648b7f6 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp @@ -88,7 +88,7 @@ namespace sd { // special case - output is scalar if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype)); } return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace())); diff --git a/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp b/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp index 49961bfe2..18d10be7b 100644 --- a/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp @@ -76,7 +76,7 @@ DECLARE_SHAPE_FN(broadcast_to) { for(int i = 1; i <= inputRank; ++i) REQUIRE_TRUE(inputShapeInfo[inputRank+1-i] == outShape[shapeLen-i] || inputShapeInfo[inputRank+1-i] == 1, 0, "BROADCAST_TO op: shape of input array %s can't be broadcasted to the shape %s !", ShapeUtils::shapeAsString(inputShapeInfo).c_str(), ShapeUtils::shapeAsString(outShape).c_str()); - auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outShape); + auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outShape); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp b/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp index 6a0ad187c..c35a81279 100644 --- a/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp @@ -34,7 +34,7 @@ namespace sd { auto shape = inputShape->asVectorT(); - auto tempShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(sd::DataType::INT64, 'c', shape); + auto tempShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(sd::DataType::INT64, 'c', shape); auto tempReductionShapeInfo = ShapeUtils::evalReduceShapeInfo('c', axis, tempShapeInfo, keepDims, oldFormat, block.workspace()); REQUIRE_TRUE(output->lengthOf() == shape::rank(tempReductionShapeInfo), 0, "evaluate_reduction_shape: output length should be %i, but got %i instead", shape::rank(tempReductionShapeInfo), output->lengthOf()); @@ -73,7 +73,7 @@ namespace sd { } } - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(length, sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(length, sd::DataType::INT64)); } } } diff --git a/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp b/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp index 86900c264..df31f5109 100644 --- a/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp @@ -70,13 +70,13 @@ namespace sd { if (shape::rank(inShape) == 0) { Nd4jLong x = 1; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', 1, &x); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', 1, &x); return SHAPELIST(newShape); } // FIXME: temp workaround for TF if (shape::isScalar(inShape)) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', 2, shape::shapeOf(inShape)); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', 2, shape::shapeOf(inShape)); return SHAPELIST(newShape); } @@ -94,7 +94,7 @@ namespace sd { shape.insert(shape.begin() + axis, 1); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), order, shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), order, shape); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/shape/flatten.cpp b/libnd4j/include/ops/declarable/generic/shape/flatten.cpp index 19cc4f469..8327ca1a1 100644 --- a/libnd4j/include/ops/declarable/generic/shape/flatten.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/flatten.cpp @@ -60,7 +60,7 @@ namespace sd { REQUIRE_TRUE(dtype == ArrayOptions::dataType(inputShape->at(e)), 0, "Flatten: all input arrays must have the same datatype"); } - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(length, dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(length, dtype)); } } } diff --git a/libnd4j/include/ops/declarable/generic/shape/order.cpp b/libnd4j/include/ops/declarable/generic/shape/order.cpp index 5b978f48f..2d7e0994c 100644 --- a/libnd4j/include/ops/declarable/generic/shape/order.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/order.cpp @@ -45,7 +45,7 @@ namespace sd { auto isFOrder = INT_ARG(0) == 1; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(input), isFOrder ? 'f' : 'c', shape::rank(input), shape::shapeOf(input)); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(input), isFOrder ? 'f' : 'c', shape::rank(input), shape::shapeOf(input)); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/shape/rank.cpp b/libnd4j/include/ops/declarable/generic/shape/rank.cpp index 8a617dc59..d12e15239 100644 --- a/libnd4j/include/ops/declarable/generic/shape/rank.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/rank.cpp @@ -37,7 +37,7 @@ namespace sd { return Status::OK(); } DECLARE_SHAPE_FN(rank) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT32)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT32)); } diff --git a/libnd4j/include/ops/declarable/generic/shape/reshape.cpp b/libnd4j/include/ops/declarable/generic/shape/reshape.cpp index 023e9bf89..38bae587e 100644 --- a/libnd4j/include/ops/declarable/generic/shape/reshape.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/reshape.cpp @@ -42,7 +42,7 @@ CUSTOM_OP_IMPL(reshape, 1, 1, false, 0, -2) { REQUIRE_TRUE(x->lengthOf() == z->lengthOf(), 0, "Reshape: lengths before and after reshape should match, but got %i vs %i", x->lengthOf(), z->lengthOf()); - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) nd4j_printv("Reshape: new shape", z->getShapeAsVector()); z->assign(x->reshape(z->ordering(), z->getShapeAsVector())); @@ -159,7 +159,7 @@ DECLARE_SHAPE_FN(reshape) { auto len = shape::prodLong(shapeNew.data(), shapeNew.size()); REQUIRE_TRUE(x->lengthOf() == len, 0, "Reshape: lengths before and after reshape should match, but got %i vs %i", x->lengthOf(), len); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(x->dataType(), orderNew, shapeNew)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(x->dataType(), orderNew, shapeNew)); } diff --git a/libnd4j/include/ops/declarable/generic/shape/shape.cpp b/libnd4j/include/ops/declarable/generic/shape/shape.cpp index e2db3db3e..098825df3 100644 --- a/libnd4j/include/ops/declarable/generic/shape/shape.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/shape.cpp @@ -46,7 +46,7 @@ namespace sd { if (block.numI() > 0) dtype = DataTypeUtils::fromInt(INT_ARG(0)); - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::rank(inShape), dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(shape::rank(inShape), dtype)); }; DECLARE_TYPES(shape_of) { diff --git a/libnd4j/include/ops/declarable/generic/shape/shapes.cpp b/libnd4j/include/ops/declarable/generic/shape/shapes.cpp index 6481d1db3..3f5428122 100644 --- a/libnd4j/include/ops/declarable/generic/shape/shapes.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/shapes.cpp @@ -43,7 +43,7 @@ namespace sd { for (int e = 0; e < inputShape->size(); e++) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::rank(inShape), sd::DataType::INT64)); + shapeList->push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(shape::rank(inShape), sd::DataType::INT64)); } return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/shape/size.cpp b/libnd4j/include/ops/declarable/generic/shape/size.cpp index d31e782c6..c30ed1b58 100644 --- a/libnd4j/include/ops/declarable/generic/shape/size.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/size.cpp @@ -37,7 +37,7 @@ namespace sd { return Status::OK(); } DECLARE_SHAPE_FN(size) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64)); } DECLARE_TYPES(size) { diff --git a/libnd4j/include/ops/declarable/generic/shape/size_at.cpp b/libnd4j/include/ops/declarable/generic/shape/size_at.cpp index 2c27b018a..46491e688 100644 --- a/libnd4j/include/ops/declarable/generic/shape/size_at.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/size_at.cpp @@ -42,7 +42,7 @@ namespace sd { } DECLARE_SHAPE_FN(size_at) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64)); } DECLARE_TYPES(size_at) { diff --git a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp index 0b71dae52..5698f957f 100644 --- a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp @@ -99,7 +99,7 @@ namespace sd { auto length = shape::length(in); if (rank == 0 || (rank == 1 && length == 1)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in))); + shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in))); return shapeList; } @@ -144,11 +144,11 @@ namespace sd { } if (shape.size() == 0) { - shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in))); + shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in))); return shapeList; } - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), order, shape); shapeList->push_back(newShape); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp b/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp index 687d79f25..ec0476e04 100644 --- a/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp @@ -48,7 +48,7 @@ namespace ops { auto conv = ArrayUtils::toLongVector(*block.getIArguments()); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), shape::order(in), conv); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), shape::order(in), conv); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/tensor/create.cpp b/libnd4j/include/ops/declarable/generic/tensor/create.cpp index c79b55497..c692a74d8 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/create.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/create.cpp @@ -44,7 +44,7 @@ namespace sd { auto shape = shapeInput->getBufferAsVector(); - return SHAPELIST(sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, order, shape)); + return SHAPELIST(sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, order, shape)); } DECLARE_TYPES(create) { diff --git a/libnd4j/include/ops/declarable/generic/tensor/fill.cpp b/libnd4j/include/ops/declarable/generic/tensor/fill.cpp index 18b9ce2b8..81cece901 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/fill.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/fill.cpp @@ -80,7 +80,7 @@ namespace sd { if (block.width() > 1) { dataType = INPUT_VARIABLE(1)->dataType(); } else if (block.numT() > 0) { - dataType = Environment::getInstance()->defaultFloatDataType(); + dataType = Environment::getInstance().defaultFloatDataType(); } else if (block.numI() > 0) { dataType = sd::DataType::INT32; } else if (block.numB() > 0) { diff --git a/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp b/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp index 374456be6..97f7b390f 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp @@ -58,7 +58,7 @@ namespace ops { auto dataType = (nInputs > 0) ? ArrayOptions::dataType(inputShape->at(0)) : ( block.numD() > 0 ? static_cast(D_ARG(0)) : DataType::FLOAT32) ; Nd4jLong steps = (nInputs > 0) ? INPUT_VARIABLE(2)->e(0) : static_cast(I_ARG(0)); - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(steps, dataType)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(steps, dataType)); } diff --git a/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp b/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp index 32ce54300..0fb8fe283 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp @@ -36,7 +36,7 @@ namespace sd { DECLARE_SHAPE_FN(ones_as) { auto in = inputShape->at(0); auto dtype = block.numD() ? D_ARG(0) : ArrayOptions::dataType(in); - auto shape = sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, in); + auto shape = sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, in); //nd4j_printf("numD: %i; dtype: %s\n", block.numD(), DataTypeUtils::asString(dtype).c_str()); diff --git a/libnd4j/include/ops/declarable/generic/tensor/range.cpp b/libnd4j/include/ops/declarable/generic/tensor/range.cpp index a39e07912..2f88b819b 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/range.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/range.cpp @@ -153,7 +153,7 @@ DECLARE_SHAPE_FN(range) { if (limit == start){ //Return [0] to match TF - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, dtype)); } REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !"); @@ -183,7 +183,7 @@ DECLARE_SHAPE_FN(range) { if (limit == start){ //Return [0] to match TF - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, dtype)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, dtype)); } REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !"); @@ -213,7 +213,7 @@ DECLARE_SHAPE_FN(range) { if (limit == start){ //Return [0] to match TF - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::INT32)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, sd::DataType::INT32)); } REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !"); @@ -247,7 +247,7 @@ DECLARE_SHAPE_FN(range) { if (limit == start){ //Return [0] to match TF - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, Environment::getInstance()->defaultFloatDataType())); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, Environment::getInstance().defaultFloatDataType())); } @@ -256,10 +256,10 @@ DECLARE_SHAPE_FN(range) { steps = static_cast((limit - start) / delta); if (!block.numD()) { - if (Environment::getInstance()->precisionBoostAllowed()) + if (Environment::getInstance().precisionBoostAllowed()) dataType = sd::DataType::DOUBLE; else - dataType = Environment::getInstance()->defaultFloatDataType(); + dataType = Environment::getInstance().defaultFloatDataType(); } if(math::nd4j_abs(start + steps * delta) < math::nd4j_abs(limit)) @@ -270,7 +270,7 @@ DECLARE_SHAPE_FN(range) { REQUIRE_TRUE(steps > 0, 0, "CUSTOM RANGE OP: value of (limit-start)/delta should be positive !"); - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(steps, dataType)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(steps, dataType)); } diff --git a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp index 88b06a631..bbdc84ce5 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp @@ -416,13 +416,13 @@ namespace sd { Nd4jLong offset; shape::calcSubArrShapeInfoAndOffset(indices.data(), x->shapeInfo(), subArrShapeInfo, offset, true, true); - auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); + auto subArrShapeInfoPack = ConstantShapeHelper::getInstance().bufferForShapeInfo(subArrShapeInfo); NDArray::prepareSpecialUse({z}, {x}); NativeOpExecutioner::execTransformAny(block.launchContext(), sd::transform::Assign, - x->bufferWithOffset(offset), reinterpret_cast(subArrShapeInfoPack.primary()), - x->specialBufferWithOffset(offset), reinterpret_cast(subArrShapeInfoPack.special()), + x->bufferWithOffset(offset), subArrShapeInfoPack.primary(), + x->specialBufferWithOffset(offset), subArrShapeInfoPack.special(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), nullptr, nullptr, nullptr, true); @@ -518,18 +518,18 @@ namespace sd { std::vector indices; bool result = _preprocess_strided_slice(&indices, &shape, input_shape, begin, end, strides, begin_mask, ellipsis_mask, end_mask, new_axis_mask, shrink_axis_mask, &is_identity, &is_simple_slice, &is_dim0); if (indices.size()) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape); // if (inputLen > 1) { -// newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', +// newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', // shape); // } else { -// newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); +// newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape)); // } return SHAPELIST(newShape); } - return SHAPELIST(ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(inShape))); } diff --git a/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp b/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp index 6d475af53..7935c567e 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp @@ -39,7 +39,7 @@ namespace sd { DECLARE_SHAPE_FN(zeros_as) { auto in = inputShape->at(0); auto dtype = block.numD() ? D_ARG(0) : ArrayOptions::dataType(in); - auto shape = sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, in); + auto shape = sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, in); return SHAPELIST(shape); } diff --git a/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp b/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp index 437222052..e67122b05 100644 --- a/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp +++ b/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp @@ -50,7 +50,7 @@ namespace sd { ArrayOptions::setDataType(newShape, ArrayOptions::dataType(inputShape->at(0))); - auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape)); + auto shape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape)); RELEASE(newShape, block.getWorkspace()); return SHAPELIST(shape); } diff --git a/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp b/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp index 89480e5bc..e8d7fc6c3 100644 --- a/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp +++ b/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp @@ -39,7 +39,7 @@ namespace sd { for (int e = 0; e < shape::rank(inputShape->at(0)); e++) shapeOf[e] = inputShape->at(0)[e+1] * 2; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape::rank(inputShape->at(0)), shapeOf); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape::rank(inputShape->at(0)), shapeOf); RELEASE(shapeOf, block.getWorkspace()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp b/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp index 7860036ed..3a115b8db 100644 --- a/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp +++ b/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp @@ -93,7 +93,7 @@ namespace sd { auto inP = inputShape->at(0); std::vector shape({shape::shapeOf(inP)[0], (Nd4jLong) block.getIArguments()->size()}); - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inP), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inP), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp index 607980f0d..0ffad12a2 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp @@ -119,7 +119,7 @@ DECLARE_SHAPE_FN(batch_to_space) { REQUIRE_TRUE(oW >= 0, 0, "BatchToSpace: crop left/right values are too big and cause negative output width dimension !"); // we always give out C order here - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {dim0 / (blockSize * blockSize), oH, oW, inputShapeInfo[4]})); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {dim0 / (blockSize * blockSize), oH, oW, inputShapeInfo[4]})); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp index f62921cc2..1ae1a2e61 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp @@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(batch_to_space_nd) { for (uint i = 0; i < numOfSpatialDims; ++i) outShape[i + 1] = outShape[i + 1] * INPUT_VARIABLE(1)->e(i) - INPUT_VARIABLE(2)->e(i,0) - INPUT_VARIABLE(2)->e(i,1); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp b/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp index 99a01d390..7758cf298 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp @@ -55,7 +55,7 @@ DECLARE_SHAPE_FN(clip_by_global_norm) { shapeList->push_back(CONSTANT(newShape)); } - shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); + shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)))); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp index 1cf750e00..6c0901201 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp @@ -140,9 +140,9 @@ DECLARE_SHAPE_FN(concat) { if(inputShape->at(i)[0] == 0) { if (shape::isEmpty(inputShape->at(i))) - arrShapes.push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, INPUT_VARIABLE(0)->dataType())); + arrShapes.push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(0, INPUT_VARIABLE(0)->dataType())); else - arrShapes.push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(1, INPUT_VARIABLE(0)->dataType())); + arrShapes.push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(1, INPUT_VARIABLE(0)->dataType())); } else{ arrShapes.push_back(inputShape->at(i)); @@ -191,7 +191,7 @@ DECLARE_SHAPE_FN(concat) { // for(int index : shapesToDelete) // RELEASE(arrShapes[index], block.getWorkspace()); - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo)); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo)); RELEASE(outShapeInfo, block.getWorkspace()); return SHAPELIST(result); } @@ -244,7 +244,7 @@ DECLARE_SHAPE_FN(concat) { // if (_dimension < 0) // _dimension += first->rankOf(); - // if (sd::Environment::getInstance()->isDebugAndVerbose()) { + // if (sd::Environment::getInstance().isDebugAndVerbose()) { // printf("Shape %i: ", 0); // shape::printShapeInfoLinear((Nd4jLong *) shapes[0]); // } @@ -262,12 +262,12 @@ DECLARE_SHAPE_FN(concat) { // oldScalars &= array->rankOf() == 2 && array->isScalar(); - // if (sd::Environment::getInstance()->isDebugAndVerbose()) { + // if (sd::Environment::getInstance().isDebugAndVerbose()) { // printf("Shape %i: ", e); // shape::printShapeInfoLinear(array->shapeInfo()); // } // } - // if (sd::Environment::getInstance()->isDebugAndVerbose()) + // if (sd::Environment::getInstance().isDebugAndVerbose()) // fflush(stdout); // if (oldScalars) { @@ -279,7 +279,7 @@ DECLARE_SHAPE_FN(concat) { // STORE_RESULT(*output); - // if (sd::Environment::getInstance()->isDebugAndVerbose()) + // if (sd::Environment::getInstance().isDebugAndVerbose()) // output->printShapeInfo("Concat result shape"); // delete[] buffers; @@ -428,7 +428,7 @@ DECLARE_SHAPE_FN(concat_bp) { for (int e = 0; e < numOfInArrs - 1; e++) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp b/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp index dcf827eb1..cb966472f 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp @@ -82,7 +82,7 @@ namespace ops { else shape = {{bS, oD, oH, oW }}; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data()); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp b/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp index ecf0e5324..d3c419b55 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp @@ -79,7 +79,7 @@ namespace ops { for(int i = 1; i < outRank; ++i) outShape[i] = shape::sizeAt(restShape, i); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(restShape), shape::order(firstShape), outShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(restShape), shape::order(firstShape), outShape))); } } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/gather.cpp b/libnd4j/include/ops/declarable/generic/transforms/gather.cpp index 79ce8ad29..a979c5abd 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/gather.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/gather.cpp @@ -161,7 +161,7 @@ DECLARE_SHAPE_FN(gather) { ArrayOptions::setPropertyBit(outputShapeInfo, ARRAY_EMPTY); } - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outputShapeInfo)); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outputShapeInfo)); RELEASE(outputShapeInfo, block.getWorkspace()); return SHAPELIST(result); diff --git a/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp b/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp index 4196385c1..0ef9d71ce 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp @@ -41,7 +41,7 @@ namespace sd { }; DECLARE_SHAPE_FN(hashcode) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp b/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp index 415361894..e08fcdbf5 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp @@ -43,7 +43,7 @@ namespace sd { DECLARE_SHAPE_FN(histogram) { auto numBins = INT_ARG(0); - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(numBins, sd::DataType::INT64)); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(numBins, sd::DataType::INT64)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp index 36175fc01..208baa5a9 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp @@ -57,7 +57,7 @@ DECLARE_TYPES(histogram_fixed_width) { DECLARE_SHAPE_FN(histogram_fixed_width) { const int nbins = block.width() == 3 ? INPUT_VARIABLE(2)->e(0) : block.getIArguments()->empty() ? 100 : INT_ARG(0); - auto outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(nbins, DataType::INT64); + auto outShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(nbins, DataType::INT64); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp index 64858001a..0fade28bf 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp @@ -86,7 +86,7 @@ DECLARE_SYN(accumulate_n, mergeadd); for (int e = 0; e < numOfInArrs; e++) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp index 83a448170..2ea0d501b 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp @@ -80,7 +80,7 @@ OP_IMPL(mergeavg, -1, 1, false) { for (int e = 0; e < numOfInArrs; e++) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp index 49ab78f7c..e95092f38 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp @@ -85,7 +85,7 @@ DECLARE_SYN(MergeMax, mergemax); for (int e = 0; e < numOfInArrs; e++) { auto inShape = inputShape->at(e); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape)))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp index 143e57a80..403272530 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp @@ -81,7 +81,7 @@ DECLARE_SHAPE_FN(mirror_pad) { if(rank == 1) { Nd4jLong len = input->lengthOf() + paddings->e(0) + paddings->e(1); - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType())); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(len, input->dataType())); } Nd4jLong* outShapeInfo(nullptr); diff --git a/libnd4j/include/ops/declarable/generic/transforms/pad.cpp b/libnd4j/include/ops/declarable/generic/transforms/pad.cpp index d5d38aaeb..d09063a95 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/pad.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/pad.cpp @@ -104,7 +104,7 @@ DECLARE_SHAPE_FN(pad) { ShapeUtils::updateStridesAndType(outShapeInfo, inputShapeInfo, shape::order(inputShapeInfo)); ShapeDescriptor descriptor(outShapeInfo); RELEASE(outShapeInfo, block.getWorkspace()); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp b/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp index 99ab3d635..b02f7010c 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp @@ -66,7 +66,7 @@ DECLARE_SHAPE_FN(repeat) { auto outShape = ShapeUtils::evalRepeatShape(axis, repeats, *input); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(input->dataType(), input->ordering(), outShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(input->dataType(), input->ordering(), outShape))); } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp index 96e7fe6b3..822f48681 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp @@ -87,13 +87,13 @@ namespace sd { shape::calcSubArrShapeInfoAndOffset(indices.data(), input->shapeInfo(), subArrShapeInfo, offset, true); - auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); + auto subArrShapeInfoPack = ConstantShapeHelper::getInstance().bufferForShapeInfo(subArrShapeInfo); NDArray::prepareSpecialUse({output}, {input}); NativeOpExecutioner::execTransformAny(block.launchContext(), sd::transform::Assign, - input->bufferWithOffset(offset), reinterpret_cast(subArrShapeInfoPack.primary()), - input->specialBufferWithOffset(offset), reinterpret_cast(subArrShapeInfoPack.special()), + input->bufferWithOffset(offset), subArrShapeInfoPack.primary(), + input->specialBufferWithOffset(offset), subArrShapeInfoPack.special(), output->buffer(), output->shapeInfo(), output->specialBuffer(), output->specialShapeInfo(), nullptr, nullptr, nullptr, true); @@ -160,7 +160,7 @@ namespace sd { shape.emplace_back(size); } - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp index 9a1683818..ffffb5396 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp @@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(space_to_batch) { REQUIRE_TRUE((inputShapeInfo[2] + padBottom + padTop) % blockSize == 0 && (inputShapeInfo[3] + padLeft + padRight) % blockSize == 0, 0, "SpaceToBatch: after padding, second and third dimensions of input array must be divisible by blockSize !"); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {inputShapeInfo[1] * blockSize * blockSize, (inputShapeInfo[2] + padBottom + padTop) / blockSize, (inputShapeInfo[3] + padLeft + padRight) / blockSize, inputShapeInfo[4]})); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {inputShapeInfo[1] * blockSize * blockSize, (inputShapeInfo[2] + padBottom + padTop) / blockSize, (inputShapeInfo[3] + padLeft + padRight) / blockSize, inputShapeInfo[4]})); } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp index 0b8c4152d..5adc35ee6 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp @@ -96,7 +96,7 @@ DECLARE_SHAPE_FN(space_to_batch_nd) { for (uint i = 0; i < numOfSpatialDims; ++i) outShape[i + 1] = (outShape[i + 1] + INPUT_VARIABLE(2)->e(i,0) + INPUT_VARIABLE(2)->e(i,1)) / INPUT_VARIABLE(1)->e(i); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape)); } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp index b831dce2f..7e108028a 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp @@ -80,7 +80,7 @@ namespace ops { else shape = {{bS, oD, oH, oW }}; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data()); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/transforms/split.cpp b/libnd4j/include/ops/declarable/generic/transforms/split.cpp index 462f2c77e..3fb925dfc 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/split.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/split.cpp @@ -115,7 +115,7 @@ namespace ops { //Edge case: splitting empty array (mainly for TF import compatibility) -> return N empty arrays // if(INPUT_VARIABLE(inputVar)->isEmpty()){ // for (int e = 0; e < num_splits; e++) { - // auto empty = ConstantShapeHelper::getInstance()->emptyShapeInfo(dataType); + // auto empty = ConstantShapeHelper::getInstance().emptyShapeInfo(dataType); // shapes->push_back(empty); // } // return shapes; @@ -136,7 +136,7 @@ namespace ops { shape[e] = shape::sizeAt(input, e); for (int e = 0; e < num_splits; e++) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dataType, shape::order(input), shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dataType, shape::order(input), shape); shapes->push_back(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp b/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp index 0bda3a6be..decda2e2d 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp @@ -116,7 +116,7 @@ namespace ops { shape[d] = c_size; } - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(input), shape::order(input), shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(input), shape::order(input), shape); shapeList->push_back(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp index 65cd41a3a..af03d5ef1 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp @@ -82,16 +82,16 @@ DECLARE_SHAPE_FN(stack) { case 0: { // we're going to return rank 1 here if (block.width() == 1) { - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, ArrayOptions::dataType(inShapeInfo))); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, ArrayOptions::dataType(inShapeInfo))); } else { - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), 'c', {(Nd4jLong) block.width(), 0})); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), 'c', {(Nd4jLong) block.width(), 0})); } } } } if(rank == 0) { - return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(block.width(), ArrayOptions::dataType(inShapeInfo))); + return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(block.width(), ArrayOptions::dataType(inShapeInfo))); } //the rank of output ShapeInfo is larger by one compared to input ShapeInfo @@ -99,7 +99,7 @@ DECLARE_SHAPE_FN(stack) { // insert (int) block.width() at dim position of input shape to get output shape outShape.insert(outShape.begin() + Nd4jLong(dim), (Nd4jLong) block.width()); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape))); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/tear.cpp b/libnd4j/include/ops/declarable/generic/transforms/tear.cpp index 61850ab0e..b2292e2b9 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/tear.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/tear.cpp @@ -57,12 +57,12 @@ namespace sd { if (dims.size() > 1) std::sort(dims.begin(), dims.end()); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(inShape, dims); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(inShape, dims); auto numTads = tadPack.numberOfTads(); auto result = SHAPELIST(); for (Nd4jLong e = 0; e < numTads; e++) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo())); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo())); result->push_back(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp index 4dc259bba..e8a502e74 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp @@ -93,7 +93,7 @@ DECLARE_SHAPE_FN(tile) { for (int e = 0; e < shape::rank(inShape); e++) shape[e] = shape::sizeAt(inShape, e) * reps[e]; - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp b/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp index beebcad86..0dfe1e54c 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp @@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(unstack) { auto result = SHAPELIST(); for(uint i = 0; i < numTads; ++i) - result->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape)); + result->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape)); return result; } @@ -88,7 +88,7 @@ DECLARE_SHAPE_FN(unstack) { auto result = SHAPELIST(); for (Nd4jLong e = 0; e < shape::length(inShapeInfo); e++) - result->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShapeInfo))); + result->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShapeInfo))); return result; } @@ -110,7 +110,7 @@ DECLARE_SHAPE_FN(unstack) { auto result = SHAPELIST(); for (int e = 0; e < shape::shapeOf(inShapeInfo)[dim]; e++) { - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), subArrShape); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), subArrShape); result->push_back(newShape); } return result; diff --git a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp index 5518588e4..f7a758af6 100644 --- a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp +++ b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp @@ -44,7 +44,7 @@ namespace sd { } DECLARE_SHAPE_FN(print_affinity) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32)); } } } diff --git a/libnd4j/include/ops/declarable/generic/util/print_variable.cpp b/libnd4j/include/ops/declarable/generic/util/print_variable.cpp index 9d3369627..74ff99fd2 100644 --- a/libnd4j/include/ops/declarable/generic/util/print_variable.cpp +++ b/libnd4j/include/ops/declarable/generic/util/print_variable.cpp @@ -42,7 +42,7 @@ namespace sd { if (block.numB() > 0) printSpecial = B_ARG(0); - if (printSpecial && !sd::Environment::getInstance()->isCPU()) { + if (printSpecial && !sd::Environment::getInstance().isCPU()) { // only specific backends support special printout. for cpu-based backends it's the same as regular print if (block.width() == 2) @@ -69,7 +69,7 @@ namespace sd { } DECLARE_SHAPE_FN(print_variable) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32)); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp index a03b4504f..aa86ea041 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp @@ -482,7 +482,7 @@ namespace sd { if (isContinuous) { //we can choose other inc and index for that case //but for now lets choose all till the last one - uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads(); + uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads(); isContinuous = false; if (rank > 2) { if (req_numThreads < 2 || bases[rank - 1] >= req_numThreads) { @@ -582,7 +582,7 @@ namespace sd { if (order == 'c' && isContinuous) { //sometimes last dimension is too big and multithreading could suffer using unfair partitioning //so we will do it only when inc is smaller our value or multithreading turned off - uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads(); + uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads(); if (req_numThreads < 2 || numNC >= req_numThreads || inc <= 2 * 8196 || rank == 3) { inc = numHW; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp index 20d91ee8b..3f37666e7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp @@ -59,8 +59,8 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp index 6610b69ac..63f26c90f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp @@ -58,8 +58,8 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp index ec8f040a9..0c9338a8e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp @@ -35,7 +35,7 @@ void bgemm_(const std::vector& vA, const std::vector& vB, st int batchSize = vA.size(); - if (BlasHelper::getInstance()->hasBatchedGEMM()) { + if (BlasHelper::getInstance().hasBatchedGEMM()) { auto arr = vA.at(0); CBLAS_TRANSPOSE *tA, *tB; int *tM, *tN, *tK, *tldA, *tldB, *tldC, *tsize; @@ -72,9 +72,9 @@ void bgemm_(const std::vector& vA, const std::vector& vB, st } if (std::is_same::value) { - BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance().dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); } else if (std::is_same::value) { - BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance().sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); } // release temporary arrays diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp index 2b6b4cd02..89cf680d4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp @@ -38,7 +38,7 @@ namespace sd { unsigned int outSize = outputList.size(); - //PRAGMA_OMP_PARALLEL_FOR_IF(outSize > Environment::getInstance()->tadThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF(outSize > Environment::getInstance().tadThreshold()) for (unsigned int i = 0; i < outSize; i++) { outputs[i].first = outputList[i]; std::vector outDims(outputs[i].first->rankOf() - 1); @@ -52,7 +52,7 @@ namespace sd { outputs[i].second = 0; - //PRAGMA_OMP_PARALLEL_FOR_IF(indices->lengthOf() > Environment::getInstance()->elementwiseThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF(indices->lengthOf() > Environment::getInstance().elementwiseThreshold()) for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) if ((*indices).e(e) == i) listOutForCurrent.at(outputs[i].second++)->assign(listOfTensors.at(e)); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp index 1deb12752..c28101558 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp @@ -78,8 +78,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in const Nd4jLong numOfSubArrs = indices->lengthOf(); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsIn); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsOut); + auto inTadPack = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimsIn); + auto outTadPack = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimsOut); auto inTadShapeInfo = inTadPack.primaryShapeInfo(); auto outTadShapeInfo = outTadPack.primaryShapeInfo(); @@ -105,8 +105,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, - inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input special*/, + outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output special*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } }; @@ -129,8 +129,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in std::vector dims = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dims); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims); + auto inTadPack = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dims); + auto outTadPack = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dims); auto inTadShapeInfo = inTadPack.primaryShapeInfo(); auto outTadShapeInfo = outTadPack.primaryShapeInfo(); @@ -158,8 +158,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, - inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input special*/, + outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output special*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp index db62c4b4f..e6f1a3896 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp @@ -116,7 +116,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->assign(scalarNDArray); } else { auto dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); auto tadArr = NDArray(reinterpret_cast(reinterpret_cast(input->buffer()) + tadPack.primaryOffsets()[indices->e(0)]), tadPack.primaryShapeInfo(), output->getContext()); output->assign(&tadArr); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp index 2183b7d5a..108804f38 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp @@ -91,8 +91,8 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con return; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input.stridesOf()[dimC]; @@ -149,8 +149,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; @@ -199,8 +199,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp index 7d376e012..910e10314 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp @@ -389,7 +389,7 @@ namespace sd { { Nd4jLong inner_total; Nd4jLong inner_last = 0; - int maxThreads = sd::Environment::getInstance()->maxMasterThreads(); + int maxThreads = sd::Environment::getInstance().maxMasterThreads(); if (second_rank == 1) { inner_total = inner_bases[0]; if (inner_total < threadingThreshold) { @@ -764,7 +764,7 @@ namespace sd { func(0, 0, total, 1); #else // - uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads(); + uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads(); Nd4jLong inner_total = getLength(inner_bases, second_rank); if (total * inner_total <= threadingThreshold) { numThreads = 1; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp index 687153f99..c2bcb8399 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp @@ -125,8 +125,8 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector //moving all dimensions (in sorted order) //to the back. //permuted version of the input shape info for setting up the tad problem - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); auto tadShapeShapeInfo = tadPack.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp index 8dc31d8c0..b49f8e61c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp @@ -35,13 +35,13 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu const int rank = input->rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {rank - 1}); TadPack outTadPack; if(shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo())) outTadPack = inTadPack; else - outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1}); + outTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input->sizeAt(-1); @@ -151,13 +151,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c const int rank = input.rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {rank - 1}); TadPack gradITadPack; if(shape::haveSameShapeAndStrides(input.shapeInfo(), gradI.shapeInfo())) gradITadPack = inTadPack; else - gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); + gradITadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(gradI.shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input.sizeAt(-1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 482709455..8f45c696b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -111,7 +111,7 @@ namespace helpers { invertedMatrix->r(i, i) /= inputMatrix->t(i, i); }; - //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance()->elementwiseThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance().elementwiseThreshold()) auto invertUpDiagonals = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) invertedMatrix->r(i, i + 1) -= (inputMatrix->t(i, i + 1) * invertedMatrix->t(i + 1, i + 1) / diff --git a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp index 53565f3c1..b9225e40d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp @@ -47,7 +47,7 @@ namespace helpers { else { // rank greater than 1 std::vector lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1}); - auto pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims); + auto pack = sd::ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), lastDims); SpecialMethods::sortTadGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), lastDims.data(), lastDims.size(), pack.primaryShapeInfo(), pack.primaryOffsets(), reverse); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp index 2aa14585b..41a265ca9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp @@ -31,7 +31,7 @@ namespace sd { auto output = reinterpret_cast(voutput); auto indices = reinterpret_cast(vindices); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(zShapeInfo, {axis}); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(zShapeInfo, {axis}); auto iLen = static_cast(shape::length(iShapeInfo)); auto tLen = static_cast(shape::length(tadPack.primaryShapeInfo())); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp index a7f40899a..ea529112d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp @@ -46,7 +46,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& // apply Fisher-Yates shuffle if(isInplace) { - //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold()) for(int i = firstDim-1; i > 0; --i) { int r = rng.relativeInt(i) % i; if(i == r) @@ -84,7 +84,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& // apply Fisher-Yates shuffle if(isInplace) { - //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->elementwiseThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().elementwiseThreshold()) for(int i = firstDim - 1; i > 0; --i) { int r = rng.relativeInt(i) % i; @@ -99,7 +99,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& std::vector indices(firstDim); std::iota(indices.begin(), indices.end(), 0); bool isZeroShuffled = false; - //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold()) + //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold()) for(int i = firstDim - 1; i > 0; --i) { int r = rng.relativeInt(i) % i; subArrsListOut.at(i)->assign(subArrsListIn.at(indices[r])); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp b/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp index 278f3bcf5..2e3d983cd 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp @@ -43,7 +43,7 @@ namespace helpers { int remainShift = fullLen % actualShift; // stage 1) swap last actualShift elements with first ones. - //PRAGMA_OMP_PARALLEL_FOR //_IF(actualShift > Environment::getInstance()->elementwiseThreshold()) + //PRAGMA_OMP_PARALLEL_FOR //_IF(actualShift > Environment::getInstance().elementwiseThreshold()) for (int e = 0; e < actualShift; ++e) { int sourceIndex = fullLen - actualShift + e; @@ -56,7 +56,7 @@ namespace helpers { } // stage 2) swap swapped actualShift elements with rest remainShiftCount times. - //PRAGMA_OMP_PARALLEL_FOR //_IF(shiftCount > Environment::getInstance()->tadThreshold()) + //PRAGMA_OMP_PARALLEL_FOR //_IF(shiftCount > Environment::getInstance().tadThreshold()) for (int count = 1; count < shiftCount; ++count) { for (int e = 0; e < actualShift; ++e) { int destinationIndex = fullLen - (count + 1) * actualShift + e; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp index e19eb5dea..0693406bf 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp @@ -87,7 +87,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads()); } else { // outRank > 1 @@ -107,7 +107,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads()); } } @@ -129,7 +129,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind } }; - samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads()); } else { std::vector dimsToExcludeInd = ShapeUtils::evalDimsToExclude(indRank, {indRank-1}); @@ -154,7 +154,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind } }; - samediff::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads()); + samediff::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance().maxThreads()); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp index bfd44629c..7fd03f8e4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp @@ -187,7 +187,7 @@ namespace sd { } else if(input.isSameShapeStrict(output)) { - TadPack tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimension); + TadPack tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimension); auto tadShapeInfo = tadPack.primaryShapeInfo(); auto tadOffsets = tadPack.primaryOffsets(); const uint numOfSubArrs = tadPack.numberOfTads(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp index 694ced4cb..3db322fc8 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp @@ -47,7 +47,7 @@ static void stack_(const std::vector& inArrs, NDArray& output, c } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadPack = ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); auto zTadShapeInfo = zTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { @@ -57,8 +57,8 @@ static void stack_(const std::vector& inArrs, NDArray& output, c void* zBuff = output.bufferWithOffset(zTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(inArrs[0]->getContext(), transform::Assign, - inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - zBuff, zTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input special*/, + zBuff, zTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output special*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } }; @@ -92,7 +92,7 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadPack = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); auto xTadShapeInfo = xTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { @@ -100,8 +100,8 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, auto xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign, - xBuff, xTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + xBuff, xTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input special*/, + outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output special*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } }; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu index c8bc709a0..e675342d9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu @@ -321,8 +321,8 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output, } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {dimension}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), {dimension}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {dimension}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), {dimension}); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu index 9ce00f318..fff4bfb11 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu @@ -81,8 +81,8 @@ static _CUDA_H void adjustHueCudaLauncher(const int blocksPerGrid, const int thr //////////////////////////////////////////////////////////////////////// void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* deltaScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -170,11 +170,11 @@ static void _adjust_hue_single(sd::LaunchContext * context, NDArray *array, NDAr // numChannels is always 3 auto tuples = array->lengthOf() / 3; if (isNHWC) { - adjustHueSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); + adjustHueSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->special(), tuples, delta); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -195,12 +195,12 @@ static void _adjust_hue_batch(sd::LaunchContext * context, NDArray *array, NDArr BUILD_SINGLE_SELECTOR(xType, _adjust_hue_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {0, 2, 3}); - auto tadLength = shape::length(packX.primaryShapeInfo()); + auto tadLength = shape::length(packX.primary()); - adjustHueSingleNCHWKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, tuples, delta); + adjustHueSingleNCHWKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platform(), packZ.platform(), tadLength, tuples, delta); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu index fd413f8cd..36837db29 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu @@ -83,8 +83,8 @@ static _CUDA_H void adjustSaturationCudaLauncher(const int blocksPerGrid, const //////////////////////////////////////////////////////////////////////// void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const NDArray* factorScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -162,10 +162,10 @@ static void _adjust_saturation_single(sd::LaunchContext * context, NDArray *arra auto tuples = array->lengthOf() / 3; if (isNHWC) { - adjustSaturationSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); + adjustSaturationSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->special(), tuples, delta); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -185,12 +185,12 @@ static void _adjust_saturation_batch(sd::LaunchContext * context, NDArray *array BUILD_SINGLE_SELECTOR(xType, _adjust_saturation_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {0, 2, 3}); - auto tadLength = shape::length(packX.primaryShapeInfo()); + auto tadLength = shape::length(packX.primary()); - adjustSaturationSingleNCHWKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, tuples, delta); + adjustSaturationSingleNCHWKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platform(), packZ.platform(), tadLength, tuples, delta); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu index 791953ab7..f7f8bf966 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu @@ -201,8 +201,8 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), axes); - // auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsToExclude); - // auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsToExclude); + // auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimsToExclude); + // auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimsToExclude); // const int threadsPerBlock = MAX_NUM_THREADS / 2; // const int blocksPerGrid = (mean->lengthOf() + threadsPerBlock - 1) / threadsPerBlock; @@ -210,7 +210,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // PointersManager manager(input->getContext(), "batchnorm"); // NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta}); - // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES); + // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->special(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platform(), packZ.platform(), epsilon), FLOAT_TYPES); // NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta}); // manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu index dfa86124a..fd676ba83 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu @@ -68,7 +68,7 @@ namespace helpers { void _confusionFunctor(sd::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output) { auto stream = context->getCudaStream(); - auto pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), 1); + auto pack = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), 1); PointersManager manager(context, "helpers::confusion"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu index 6f29995d3..bce7316ef 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu @@ -124,7 +124,7 @@ namespace sd { for (int i = sourceDimsLen; i > 0; i--) sourceDims[sourceDimsLen - i] = input->rankOf() - i; //compute tad array for given dimensions - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), sourceDims); std::vector outBuffers(outSize); std::vector tadShapes(outSize); @@ -140,7 +140,7 @@ namespace sd { for (int k = 1; k < r; k++) outDims[k - 1] = k; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->shapeInfo(), outDims); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(outputList.at(i)->shapeInfo(), outDims); outBuffers[i] = outputList.at(i)->specialBuffer(); tadShapes[i] = packZ.platformShapeInfo(); @@ -262,7 +262,7 @@ namespace sd { for (int i = restDims.size(); i > 0; i--) restDims[restDims.size() - i] = output->rankOf() - i; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), restDims); std::vector inputBuffers(inputSize); std::vector inputTadShapes(inputSize); @@ -276,7 +276,7 @@ namespace sd { for (int i = sourceDims.size(); i > 0; i--) sourceDims[sourceDims.size() - i] = inputs[e]->rankOf() - i; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->shapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(inputs[e]->shapeInfo(), sourceDims); indicesBuffers[e] = indices[e]->specialBuffer(); indicesShapes[e] = indices[e]->specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu index c5e8848cb..e1c506879 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu @@ -114,8 +114,8 @@ namespace helpers { if (sizeCol * rateCol < 3) colCast = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size()); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size()); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size()); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size()); int batchCount = packX.numberOfTads(); PointersManager manager(context, "helpers::extractPatches"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu index adb5a3ec4..c6041b33b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu @@ -164,9 +164,9 @@ void histogramFixedWidth(sd::LaunchContext* context, const NDArray& input, const // cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong)); // if (err != 0) // throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err); -// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.specialShapeInfo(), output.lengthOf()); -// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.specialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); -// returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf()); +// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.special(), output.lengthOf()); +// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.special(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); +// returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.special(), output.lengthOf()); // //cudaSyncStream(*stream); // err = cudaFree(outputBuffer); // if (err != 0) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu index c26b79ee6..749f60c11 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu @@ -69,8 +69,8 @@ linkage void rgbToYuvCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -124,8 +124,8 @@ linkage void yuvToRgbCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -287,8 +287,8 @@ static _CUDA_H void rgbToHsvCudaLauncher(const int blocksPerGrid, const int thre /////////////////////////////////////////////////////////////////// void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -306,8 +306,8 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* /////////////////////////////////////////////////////////////////// void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -389,8 +389,8 @@ __global__ void tripleTransformerCuda(const void *vx, const Nd4jLong *xShapeInfo template static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads()); @@ -399,8 +399,8 @@ static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* ou template FORCEINLINE static void yiqRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu index 9876417df..820a6c258 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu @@ -32,7 +32,7 @@ namespace sd { NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); } else { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), @@ -51,7 +51,7 @@ namespace sd { NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); } else { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), @@ -70,7 +70,7 @@ namespace sd { NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); } else { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), @@ -89,7 +89,7 @@ namespace sd { NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo()); } else { - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu index 723b0f215..f6e233aab 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu @@ -61,7 +61,7 @@ static void ismax_(sd::LaunchContext * context, const NDArray* input, NDArray* o int dimensionLength = dimensions.size(); std::vector copy(dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), copy.data(), copy.size()); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), copy.data(), copy.size()); // we launch legacy IndexMax op, to get indices of max values along dimension auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu index ebc0732e2..123c06ac5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu @@ -116,8 +116,8 @@ namespace helpers { template static void lrnBP_(sd::graph::Context& block, const NDArray& input, const NDArray& gradO, NDArray& gradI, const int depth, const float bias, const float alpha, const float beta) { auto rank = input.rankOf(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {rank - 1}); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(gradI.shapeInfo(), {rank - 1}); const auto tadLength = shape::length(packX.primaryShapeInfo()); const int numBlocks = sd::math::nd4j_min(1024, packX.numberOfTads()); @@ -144,8 +144,8 @@ namespace helpers { template static void lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* output, int depth, double bias, double alpha, double beta) { auto rank = input->rankOf(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1}); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {rank - 1}); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {rank - 1}); const auto tadLength = shape::length(packX.primaryShapeInfo()); const int numBlocks = sd::math::nd4j_min(1024, packX.numberOfTads()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu index 8d8548be5..b28efff80 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu @@ -48,7 +48,7 @@ namespace helpers { template static void fillRegularizer(sd::LaunchContext* context, NDArray& ioMatrix, double const value) { - auto lastDimsTads = ConstantTadHelper::getInstance()->tadForDimensions(ioMatrix.shapeInfo(), {-2, -1}); + auto lastDimsTads = ConstantTadHelper::getInstance().tadForDimensions(ioMatrix.shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); auto rows = ioMatrix.sizeAt(-2); //auto cols = ioMatrix.sizeAt(-1); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu index 682b2eee9..c59ef9489 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu @@ -604,8 +604,8 @@ namespace helpers { // output->tickWriteDevice(); permutationVectors->applyTrueBroadcast(sd::BroadcastOpsTuple::Assign(), iota, *permutationVectors, true, nullptr); // permutationVectors->tickWriteDevice(); - auto tads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); - auto permutaionTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-1}); + auto tads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1}); + auto permutaionTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-1}); auto batchNum = tads.numberOfTads(); luBatchedKernel<<>>(reinterpret_cast(output->platformBuffer()), output->specialShapeInfo(), reinterpret_cast(permutationVectors->platformBuffer()), @@ -624,8 +624,8 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); - //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + //auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); // DataType dtype = input->dataType(); // if (dtype != DataType::DOUBLE) // dtype = DataType::FLOAT32; @@ -640,7 +640,7 @@ namespace helpers { // if (matrix.dataType() == input->dataType()) fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); // else -// fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); +// fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->special(), pos, n); lup_(context, &matrix, nullptr, nullptr); // else // lup_(context, &matrix, nullptr, nullptr); @@ -668,8 +668,8 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); - //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + //auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); DataType dtype = input->dataType(); if (dtype != DataType::DOUBLE) dtype = DataType::FLOAT32; @@ -685,7 +685,7 @@ namespace helpers { // if (matrix.dataType() == input->dataType()) fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); // else -// fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); +// fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->special(), pos, n); // if (matrix.dataType() == input->dataType()) lup_(context, &matrix, nullptr, nullptr); @@ -759,10 +759,10 @@ namespace helpers { NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, context); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {output->rankOf() - 2, output->rankOf() - 1}); auto stream = context->getCudaStream(); @@ -849,7 +849,7 @@ namespace helpers { throw cuda_exception::build("helpers::cholesky_: Cannot create solver handle", status); } F **dArrayBatch = nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); const Nd4jLong batchSize = packX.numberOfTads(); @@ -980,7 +980,7 @@ namespace helpers { auto outputBuf = output->dataBuffer()->specialAsT(); //reinterpret_cast(output->specialBuffer()); // + e * n2; // + e * n2; auto inputBuf = tempOutput.dataBuffer()->specialAsT(); //reinterpret_cast(tempOutput.specialBuffer()); output->nullify(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); logDetKernel<<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu index 78249bc38..446d57b27 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu @@ -94,8 +94,8 @@ namespace helpers { std::vector lastDims({input->rankOf() - 2, input->rankOf() - 1}); std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), lastDims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), lastDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), lastDims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), lastDims); const Nd4jLong numTads = packX.numberOfTads(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu index 30d5f0ef9..b8edcbc26 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu @@ -71,8 +71,8 @@ namespace helpers { //tadOnlyInputShapeInfo, tadInputOffsets, tadOnlyOutputShapeInfo, tadOutputOffsets; std::vector outputDims({output->rankOf() - 1}); std::vector inputDims({input->rankOf() - 2, input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), inputDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), outputDims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), inputDims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), outputDims); if (!output->isActualOnDeviceSide()) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu index 3f2ed13b5..918dca510 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu @@ -104,7 +104,7 @@ namespace helpers { hOutBuffers[i] = outArrs[i]->specialBuffer(); - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(outArrs[i]->shapeInfo(), {inIndices[i]}); + auto pack = ConstantTadHelper::getInstance().tadForDimensions(outArrs[i]->shapeInfo(), {inIndices[i]}); hOutTadShapes[i] = pack.specialShapeInfo(); hOutTadOffsets[i] = pack.specialOffsets(); hNumTads[i] = pack.numberOfTads(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu index c3b4abc51..c2f34f9fe 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu @@ -66,7 +66,7 @@ namespace helpers { else { // rank greater than 1 std::vector lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), lastDims); auto pTadShape = packX.specialShapeInfo(); auto pTadShapeH = packX.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu index 7f2bcdcfd..1bc50fad7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu @@ -97,7 +97,7 @@ namespace helpers { shape::checkDimensions(inputRank, axis); auto tempArray = input.dup(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.shapeInfo(), axis); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(tempArray.shapeInfo(), axis); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu index d2832ec80..959b45865 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu @@ -148,8 +148,8 @@ static void prefixPerBlockCudaLauncher(const int blocksPerGrid, const int thread /////////////////////////////////////////////////////////////////// void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, const std::vector& dims, bool exclusive, bool reverse) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims); const Nd4jLong numTads = packX.numberOfTads(); const Nd4jLong tadLen = x->lengthOf() / numTads; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu index 828867b4e..e499f21d0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu @@ -60,9 +60,9 @@ namespace helpers { m({col, m.rows(), col, m.columns()}).assign(in({col, m.rows(), col, m.columns()})); // auto stream = context->getCudaStream(); -// matrixMinorKernel<<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT(), m.specialShapeInfo(), -// matrixMinorKernel<<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT(), m.specialShapeInfo(), -// reinterpret_cast(in.specialBuffer()), in.specialShapeInfo(), col, in.rows(), in.columns()); +// matrixMinorKernel<<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT(), m.special(), +// matrixMinorKernel<<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT(), m.special(), +// reinterpret_cast(in.specialBuffer()), in.special(), col, in.rows(), in.columns()); // m.tickWriteDevice(); return m; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu index 2ed45356e..6ae1b22a8 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu @@ -212,8 +212,8 @@ namespace helpers { ////////////////////////////////////////////////////////////////////////// void reverse(sd::LaunchContext * context, const NDArray* input, NDArray* output, const std::vector* intArgs) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), *intArgs); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), *intArgs); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), *intArgs); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), *intArgs); NDArray::prepareSpecialUse({output}, {input}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu index 773f7279d..a5149c978 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu @@ -253,7 +253,7 @@ namespace helpers { for (int i = 0; i < dims.size(); ++i) dims[i] = axe + 1 + i; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dims); int numTads = packZ.numberOfTads(); int sizeAt = input->sizeAt(axe); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu index 94b0e0080..cbe8895b2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu @@ -736,8 +736,8 @@ __global__ static void scatterLockCuda(const int opCode, std::vector yTadDims(sizeOfUpdDims); std::iota(yTadDims.begin(), yTadDims.end(), 0); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), zTadDims); const Nd4jLong zTadLen = shape::length(packZ.primaryShapeInfo()); const Nd4jLong yTadLen = shape::length(packY.primaryShapeInfo()); @@ -963,21 +963,21 @@ __global__ static void scatterLockCuda(const int opCode, std::vector dims = {0}; auto inverted = ShapeUtils::evalDimsToExclude(output.rankOf(), dims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), inverted); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), inverted); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), inverted); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), inverted); auto psX = packX.specialShapeInfo(); - auto psY = packY.specialShapeInfo(); + auto psY = packY.special(); PointersManager manager(context, "scatter"); auto poX = packX.specialOffsets(); - auto poY = packY.specialOffsets(); + auto poY = packY.special(); NDArray::prepareSpecialUse({&output}, {&updates, &indices}); unsigned int tadLengthX = shape::length(packX.primaryShapeInfo()); - unsigned int tadLengthY = shape::length(packY.primaryShapeInfo()); + unsigned int tadLengthY = shape::length(packY.primary()); if (tadLengthX != tadLengthY) throw std::runtime_error("scatter: Lengths of TADs must be equal"); @@ -1016,9 +1016,9 @@ const int xLastDim = indices.sizeAt(-1); zTadDims[i] = zRank - 1 - j; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.shapeInfo(), {xRank - 1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), yTadDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(indices.shapeInfo(), {xRank - 1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), yTadDims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), zTadDims); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); @@ -1152,16 +1152,16 @@ const int xLastDim = indices.sizeAt(-1); // PointersManager::printDevContentOnDev(zShapeInfo, 8); // manager.printDevContentOnHost(indices.specialBuffer(), indices.lengthOf()); - // manager.printDevContentOnHost(indices.specialShapeInfo(), shape::shapeInfoLength(indices.rankOf())); + // manager.printDevContentOnHost(indices.special(), shape::shapeInfoLength(indices.rankOf())); // manager.printDevContentOnHost(updates.specialBuffer(), updates.lengthOf()); - // manager.printDevContentOnHost(updates.specialShapeInfo(), shape::shapeInfoLength(updates.rankOf())); - // manager.printDevContentOnHost(output.specialShapeInfo(), shape::shapeInfoLength(output.rankOf())); + // manager.printDevContentOnHost(updates.special(), shape::shapeInfoLength(updates.rankOf())); + // manager.printDevContentOnHost(output.special(), shape::shapeInfoLength(output.rankOf())); // printf("!!!!!!!\n"); - // manager.printDevContentOnHost(packX.specialShapeInfo(), 2*shape::rank(packX.primaryShapeInfo()) + 4); - // manager.printDevContentOnHost(packX.specialOffsets(), packX.numberOfTads()); - // manager.printDevContentOnHost(packY.specialShapeInfo(), 2*shape::rank(packY.primaryShapeInfo()) + 4); - // manager.printDevContentOnHost(packY.specialOffsets(), packY.numberOfTads()); - // manager.printDevContentOnHost(packZ.specialShapeInfo(), 2*shape::rank(packZ.primaryShapeInfo()) + 4); - // manager.printDevContentOnHost(packZ.specialOffsets(), packZ.numberOfTads()); + // manager.printDevContentOnHost(packX.special(), 2*shape::rank(packX.primary()) + 4); + // manager.printDevContentOnHost(packX.special(), packX.numberOfTads()); + // manager.printDevContentOnHost(packY.special(), 2*shape::rank(packY.primary()) + 4); + // manager.printDevContentOnHost(packY.special(), packY.numberOfTads()); + // manager.printDevContentOnHost(packZ.special(), 2*shape::rank(packZ.primary()) + 4); + // manager.printDevContentOnHost(packZ.special(), packZ.numberOfTads()); // printf("dddddddd\n"); - // shape::printShapeInfoLinear(packY.primaryShapeInfo()); \ No newline at end of file + // shape::printShapeInfoLinear(packY.primary()); \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu index a17464cbd..3b422a5c2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu @@ -51,7 +51,7 @@ namespace sd { void scatterSimple_(sd::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dims); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dims); auto xLength = shape::length(packX.primaryShapeInfo()); auto iLength = indices.lengthOf(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu index 51f917a79..3a3bfef12 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu @@ -114,8 +114,8 @@ namespace sd { for (int e = 2; e < 2 + numOfDims; e++) tadDimensions[e-2] = (*intArgs)[e]; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), tadDimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), tadDimensions); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), tadDimensions); + auto packY = ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), tadDimensions); NDArray indices(const_cast(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, sd::DataType::INT32, context); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu index 927b1bb2f..d623c8734 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu @@ -185,8 +185,8 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -226,8 +226,8 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -349,10 +349,10 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); Nd4jLong const* inputTads = packX.specialShapeInfo(); Nd4jLong const* inputTadOffsets = packX.specialOffsets(); Nd4jLong const* outputTads = packZ.specialShapeInfo(); @@ -397,10 +397,10 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); Nd4jLong const* inputTads = packX.specialShapeInfo(); Nd4jLong const* inputTadOffsets = packX.specialOffsets(); Nd4jLong const* outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu index c75293c1d..5ccecf37c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu @@ -174,8 +174,8 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -216,8 +216,8 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); Nd4jLong const* inputTads = packX.specialShapeInfo(); Nd4jLong const* inputTadOffsets = packX.specialOffsets(); Nd4jLong const* outputTads = packZ.specialShapeInfo(); @@ -333,10 +333,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); Nd4jLong const* inputTads = packX.specialShapeInfo(); Nd4jLong const* inputTadOffsets = packX.specialOffsets(); Nd4jLong const* outputTads = packZ.specialShapeInfo(); @@ -386,10 +386,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); Nd4jLong const* inputTads = packX.specialShapeInfo(); Nd4jLong const* inputTadOffsets = packX.specialOffsets(); Nd4jLong const* outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu index c6f2d4ed2..9e825c701 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu @@ -177,8 +177,8 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -222,8 +222,8 @@ namespace helpers { else { output->assign(DataTypeUtils::max()); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -347,10 +347,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -395,10 +395,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu index 026ded3e7..44e077300 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu @@ -138,8 +138,8 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -181,8 +181,8 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -296,10 +296,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -346,10 +346,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu index b72abeffc..20f232332 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu @@ -108,8 +108,8 @@ namespace helpers { else { output->nullify(); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -226,10 +226,10 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu index 7a762a526..a2050d695 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu @@ -186,8 +186,8 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -227,8 +227,8 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -338,9 +338,9 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); @@ -379,9 +379,9 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions); auto inputTads = packX.specialShapeInfo(); auto inputTadOffsets = packX.specialOffsets(); auto outputTads = packZ.specialShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu index cf8308bbe..43ef78c3e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu @@ -73,12 +73,12 @@ namespace sd { helpers::lu(context, leftInput, &leftOutput, &permutations); auto leftLower = leftOutput.dup(); auto rightOutput = rightInput->ulike(); - auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.shapeInfo(), {-2, -1}); + auto leftLowerTad = ConstantTadHelper::getInstance().tadForDimensions(leftLower.shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); oneOnDiagonalKernel<<<128, 256, 256, *stream>>>(leftLower.dataBuffer()->specialAsT(), leftLower.specialShapeInfo(), leftLowerTad.specialShapeInfo(), leftLowerTad.specialOffsets(), leftLowerTad.numberOfTads(), leftLower.sizeAt(-1)); auto P = leftOutput.ulike(); P.nullify(); - auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.shapeInfo(), {-2, -1}); - auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.shapeInfo(), {-1}); + auto PTad = ConstantTadHelper::getInstance().tadForDimensions(P.shapeInfo(), {-2, -1}); + auto permutationsTad = ConstantTadHelper::getInstance().tadForDimensions(permutations.shapeInfo(), {-1}); restorePermutationsKernel<<<128, 256, 256, *stream>>>(P.dataBuffer()->specialAsT(), P.specialShapeInfo(), permutations.dataBuffer()->specialAsT(), PTad.specialShapeInfo(), PTad.specialOffsets(), permutationsTad.specialShapeInfo(), permutationsTad.specialOffsets(), permutationsTad.numberOfTads(), permutations.sizeAt(-1)); P.tickWriteDevice(); @@ -120,8 +120,8 @@ namespace sd { template static void adjointMatrix_(sd::LaunchContext* context, NDArray const* input, NDArray* output) { NDArray::prepareSpecialUse({output}, {input}); - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); auto outputBuf = reinterpret_cast(output->specialBuffer()); auto rows = input->sizeAt(-2); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu index f0983b76c..2bb09c3b5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu @@ -91,7 +91,7 @@ static void stack_(sd::LaunchContext* context, const std::vector } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadPack = ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); auto zTadShapeInfo = zTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { @@ -179,7 +179,7 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadPack = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); auto xTadShapeInfo = xTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { @@ -272,7 +272,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // outArrs[i]->syncToDevice(); // input.syncToDevice(); -// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->special(), axis), LIBND4J_TYPES); // manager.synchronize(); @@ -350,7 +350,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // inArrs[i]->syncToDevice(); // output.syncToDevice(); -// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.special(), axis), LIBND4J_TYPES); // manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu index ce19d41cc..61aefa255 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu @@ -91,7 +91,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const PointersManager manager(context, "in_top_k"); - const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->shapeInfo(), {1}); + const auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(predictions->shapeInfo(), {1}); const int threadsPerBlock = MAX_NUM_THREADS; const int blocksPerGrid = static_cast(packX.numberOfTads()); @@ -243,9 +243,9 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const template static int topKFunctor_(sd::LaunchContext * context, const NDArray* input, NDArray* values, NDArray* indices, const uint k, bool needSort) { - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 1}); - auto packI = ConstantTadHelper::getInstance()->tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1}); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(values->shapeInfo(), {input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 1}); + auto packI = ConstantTadHelper::getInstance().tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1}); + auto packZ = ConstantTadHelper::getInstance().tadForDimensions(values->shapeInfo(), {input->rankOf() - 1}); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu index f14b12e35..8d7f700dd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu @@ -321,7 +321,7 @@ void tileBP(sd::LaunchContext * context, const NDArray& gradO /*input*/, NDArray template static __global__ void fillShuffleKernel(T* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) { -// PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold()) +// PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold()) auto tid = blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu index 6302262be..e77bb4e19 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu @@ -141,9 +141,9 @@ namespace sd { static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output) { NDArray::prepareSpecialUse({output}, {leftInput, rightInput}); - auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1}); - auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); + auto leftTads = ConstantTadHelper::getInstance().tadForDimensions(leftInput->shapeInfo(), {-2, -1}); + auto rightTads = ConstantTadHelper::getInstance().tadForDimensions(rightInput->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); T const* leftBuf = reinterpret_cast(leftInput->specialBuffer()); @@ -243,8 +243,8 @@ namespace sd { static void adjointTriangularMatrix_(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output) { - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); auto inputBuf = reinterpret_cast(input->specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()); diff --git a/libnd4j/include/ops/declarable/impl/BooleanOp.cpp b/libnd4j/include/ops/declarable/impl/BooleanOp.cpp index 00079f9ae..07960497a 100644 --- a/libnd4j/include/ops/declarable/impl/BooleanOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BooleanOp.cpp @@ -33,7 +33,7 @@ namespace sd { * Output shape of any BooleanOp is ALWAYS scalar */ ShapeList *BooleanOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { - return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::BOOL)); + return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::BOOL)); } bool BooleanOp::verify(sd::graph::Context &block) { diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp index 8f0a6dcb8..634236d35 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp @@ -38,32 +38,32 @@ namespace sd { if(shape::isEmpty(x) || shape::isEmpty(y)) { // this is edge case, [3, 4] + [] = [] if ((shape::isEmpty(x) && shape::rank(x) == 0) || (shape::isEmpty(y) && shape::rank(y) == 0)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype))); return shapeList; } const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { if (shape::rank(x) >= shape::rank(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype))); } } else if (shape::equalsSoft(x, y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else if (shape::isScalar(x) && !shape::isScalar(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype))); } else if (!shape::isScalar(x) && shape::isScalar(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { // in this case we'll throw exception later - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp index 7f7a14861..4611d49cb 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp @@ -36,7 +36,7 @@ namespace sd { auto outputs = _descriptor->getOutputTypesForOutput(0); sd::DataType dtype = block.dataType(0); if (block.dataType(0) != sd::DataType::BOOL && !(outputs.size() == 1 && outputs[0] == sd::DataType::BOOL)) { - if (Environment::getInstance()->isExperimentalBuild()) { + if (Environment::getInstance().isExperimentalBuild()) { if (shape::length(y) > shape::length(x)) { dtype = DataTypeUtils::pickPairwiseResultType(y, x); } else { @@ -51,33 +51,33 @@ namespace sd { if(shape::isEmpty(x) || shape::isEmpty(y)) { // this is edge case, [3, 4] + [] = [] if ((shape::isEmpty(x) && shape::rank(x) == 0) || (shape::isEmpty(y) && shape::rank(y) == 0)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype))); return shapeList; } const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { if (shape::rank(x) >= shape::rank(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype))); } } else if (shape::equalsSoft(x, y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else if (shape::isScalar(x) && !shape::isScalar(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype))); } else if (!shape::isScalar(x) && shape::isScalar(y)) { - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { // in this case we'll throw exception later - shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); + shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype))); } return shapeList; diff --git a/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp index 13aa763f8..d70355038 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp @@ -47,7 +47,7 @@ namespace sd { ShapeList* DeclarableListOp::calculateOutputShape(ShapeList* inputShape, sd::graph::Context& block) { // TODO: ensure this method isn't ever called - auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', {1, 1}); + auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', {1, 1}); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 713a02666..cd8d0bdd8 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -158,7 +158,7 @@ namespace sd { auto fp = ctx.isFastPath(); - if (Environment::getInstance()->isProfiling()) { + if (Environment::getInstance().isProfiling()) { if (ctx.getVariableSpace() != nullptr && ctx.getVariableSpace()->flowPath() != nullptr) { prof = ctx.getVariableSpace()->flowPath()->profile(); node = prof->nodeById(ctx.nodeId()); @@ -166,7 +166,7 @@ namespace sd { } if (ctx.isInplace()) { - if (Environment::getInstance()->isProfiling() && node != nullptr) { + if (Environment::getInstance().isProfiling() && node != nullptr) { if (fp) { // } else { @@ -227,7 +227,7 @@ namespace sd { ShapeList inSha; int results = 0; - if (Environment::getInstance()->isProfiling() && node != nullptr) + if (Environment::getInstance().isProfiling() && node != nullptr) inputStart = std::chrono::system_clock::now(); int cntIn = 0; @@ -263,7 +263,7 @@ namespace sd { } // optionally saving input time - if (Environment::getInstance()->isProfiling() && node != nullptr) { + if (Environment::getInstance().isProfiling() && node != nullptr) { inputEnd = std::chrono::system_clock::now(); auto inputTime = std::chrono::duration_cast(inputEnd - inputStart).count(); node->setInputTime(inputTime); @@ -279,7 +279,7 @@ namespace sd { results = outSha->size(); // optionally saving shapeTime - if (Environment::getInstance()->isProfiling() && node != nullptr) { + if (Environment::getInstance().isProfiling() && node != nullptr) { shapeEnd = std::chrono::system_clock::now(); auto prepTime = std::chrono::duration_cast(shapeEnd - shapeStart).count(); node->setShapeFunctionTime(prepTime); @@ -299,7 +299,7 @@ namespace sd { std::pair pair(ctx.nodeId(), cnt++); if (!ctx.isValueAvailable(pair.second)) { - if (Environment::getInstance()->isDebugAndVerbose()) + if (Environment::getInstance().isDebugAndVerbose()) shape::printShapeInfoLinear("Going to create variable with shape", out); // we're creating non-initialized array here @@ -367,7 +367,7 @@ namespace sd { delete outSha; // saving arrayTime - if (Environment::getInstance()->isProfiling() && node != nullptr) { + if (Environment::getInstance().isProfiling() && node != nullptr) { arrayEnd = std::chrono::system_clock::now(); auto arrayTime = std::chrono::duration_cast(arrayEnd - arrayStart).count(); node->setArrayTime(arrayTime); @@ -599,7 +599,7 @@ namespace sd { Nd4jLong prepTime, outerTime; Nd4jLong memoryBefore = block->workspace() == nullptr ? 0L : block->workspace()->getSpilledSize() + block->workspace()->getUsedSize(); - if (Environment::getInstance()->isProfiling()) + if (Environment::getInstance().isProfiling()) timeEnter = std::chrono::system_clock::now(); // basic validation: ensure inputs are set @@ -615,7 +615,7 @@ namespace sd { // this method will allocate output NDArrays for this op auto numOutputs = this->prepareOutputs(*block); - if (Environment::getInstance()->isProfiling()) { + if (Environment::getInstance().isProfiling()) { timeStart = std::chrono::system_clock::now(); prepTime = std::chrono::duration_cast(timeStart - timeEnter).count(); } @@ -625,10 +625,10 @@ namespace sd { bool hasHelper = false; // platform helpers use might be forbidden for various reasons, so we'll check it out first - if (block->helpersAllowed() && sd::Environment::getInstance()->helpersAllowed()) { + if (block->helpersAllowed() && sd::Environment::getInstance().helpersAllowed()) { // if we have platform-specific helper for this op - invoke it - if (OpRegistrator::getInstance()->hasHelper(this->getOpHash(), block->engine())) { - auto helper = OpRegistrator::getInstance()->getPlatformHelper(this->getOpHash(), block->engine()); + if (OpRegistrator::getInstance().hasHelper(this->getOpHash(), block->engine())) { + auto helper = OpRegistrator::getInstance().getPlatformHelper(this->getOpHash(), block->engine()); if (helper->isUsable(*block)) { status = helper->invokeHelper(*block); hasHelper = true; @@ -641,13 +641,13 @@ namespace sd { status = this->validateAndExecute(*block); // optionally saving execution time - if (Environment::getInstance()->isProfiling()) { + if (Environment::getInstance().isProfiling()) { timeEnd = std::chrono::system_clock::now(); outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); block->setInnerTime(outerTime); } - if (Environment::getInstance()->isProfiling() && block->getVariableSpace() != nullptr) { + if (Environment::getInstance().isProfiling() && block->getVariableSpace() != nullptr) { auto fp = block->getVariableSpace()->flowPath(); if (fp != nullptr) { auto p = fp->profile(); @@ -663,7 +663,7 @@ namespace sd { // now we print out all outputs for this node - if (sd::Environment::getInstance()->isDebugAndVerbose()) { + if (sd::Environment::getInstance().isDebugAndVerbose()) { auto vs = block->getVariableSpace(); for (int e = 0; e < numOutputs; e++) { diff --git a/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp index 4f6646694..2dd281991 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp @@ -52,7 +52,7 @@ namespace sd { // special case - output is scalar if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max())) { - auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); + auto newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp index f7cb3de92..a171ff339 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp @@ -41,11 +41,11 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); PointersManager manager(block.launchContext(), "LegacyBroadcastBoolOp"); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); REQUIRE_TRUE(shape::length(packX.primaryShapeInfo()) == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]", (int) shape::length(packX.primaryShapeInfo()), (int) y->lengthOf()); @@ -57,10 +57,10 @@ namespace sd { else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims); - auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); - auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); + auto zTadShape = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); + auto zTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), @@ -91,7 +91,7 @@ namespace sd { */ ShapeList* LegacyBroadcastBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); } } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp index 82899bbdb..c47cc9040 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp @@ -47,14 +47,14 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); auto tadLen = shape::length(packX.primaryShapeInfo()); REQUIRE_TRUE(tadLen == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]",tadLen, (int) y->lengthOf()); PointersManager manager(block.launchContext(),"LegacyBroadcastOp"); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); if (x == z) NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), @@ -62,10 +62,10 @@ namespace sd { z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), dims.size(), pTadShape, pTadOffsets, pTadShape, pTadOffsets); else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims); - auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); - auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); + auto zTadShape = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); + auto zTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp index 7fc6bf793..a9e8475c0 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp @@ -54,7 +54,7 @@ namespace sd { newShape[6] = 1; newShape[7] = 99; - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape, DataType::INT64)); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape, DataType::INT64)); RELEASE(newShape, block.getWorkspace()); return SHAPELIST(result); } else if (block.getAxis()->size()){ @@ -89,7 +89,7 @@ namespace sd { newShape[6] = 1; newShape[7] = 99; - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape, DataType::INT64)); + auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape, DataType::INT64)); RELEASE(newShape, block.getWorkspace()); return SHAPELIST(result); } else { @@ -139,7 +139,7 @@ namespace sd { if (dims.size() > 1) std::sort(dims.begin(), dims.end()); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), @@ -147,7 +147,7 @@ namespace sd { reinterpret_cast(z->buffer()), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) dims.size(), - Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); + Environment::getInstance().isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance().isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); } } else { // TF mode @@ -175,7 +175,7 @@ namespace sd { REQUIRE_TRUE(axis.size() > 0, 0, "Some dimensions required for reduction!"); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), axis); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), axis); NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), @@ -183,8 +183,8 @@ namespace sd { reinterpret_cast(z->buffer()), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) axis.size(), - Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), - Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); + Environment::getInstance().isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), + Environment::getInstance().isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp index 11a05a76c..8b6e1406e 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp @@ -67,7 +67,7 @@ namespace sd { */ ShapeList *LegacyPairwiseTransformBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp index 085780c56..09c0a054a 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp @@ -344,7 +344,7 @@ namespace sd { auto zShapeVector = zShapeArr->asVectorT(); auto dtype = block.dataType(); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector)); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', zShapeVector)); } else throw std::runtime_error("LegacyRandomOp: Unknown input data type!"); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp index f110c0c55..700e0dba9 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp @@ -52,16 +52,16 @@ namespace sd { if (dims[e] < 0) dims[e] += x->rankOf(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims); REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); - auto xTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadX.tadOnlyShapeInfo)); - auto xTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOffsets, tadX.numTads * sizeof(Nd4jLong)); + auto xTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadX.tadOnlyShapeInfo)); + auto xTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOffsets, tadX.numTads * sizeof(Nd4jLong)); - auto yTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadY.tadOnlyShapeInfo)); - auto yTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOffsets, tadY.numTads * sizeof(Nd4jLong)); + auto yTadShape = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadY.tadOnlyShapeInfo)); + auto yTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOffsets, tadY.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduce3(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp index 4aced5aec..e16e71619 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp @@ -75,10 +75,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), @@ -111,10 +111,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp index 55197844a..a0ff14858 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp @@ -76,10 +76,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), @@ -109,10 +109,10 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp index 628c4cb5f..f5007ff03 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp @@ -78,10 +78,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), @@ -111,10 +111,10 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp index e406a3a2d..299d19f14 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp @@ -73,10 +73,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), @@ -106,10 +106,10 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp index 3e73b10f5..0c700b88b 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp @@ -72,7 +72,7 @@ namespace sd { x->applyScalarArr(static_cast(opNum), y, *z); // NDArray::prepareSpecialUse({z}, {x, &y}); - // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1)); + // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.special(), extras.argumentsAsT(z->dataType(), 1)); manager.synchronize(); } else { diff --git a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp index b8694f9ff..4a60064b5 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp @@ -58,10 +58,10 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims); - auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); + auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); + auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp index a0651d1fc..3bf4f1ff4 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp @@ -65,7 +65,7 @@ namespace sd { */ ShapeList *LegacyTransformBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); + return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL))); } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp b/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp index 398c11729..84c1bc291 100644 --- a/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp +++ b/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp @@ -33,7 +33,7 @@ namespace sd { _numOutputs = 1; _opName = opName; - _hash = sd::ops::HashHelper::getInstance()->getLongHash(_opName); + _hash = sd::ops::HashHelper::getInstance().getLongHash(_opName); _opClass = sd::graph::OpClass_CONDITIONAL; _scalar = isScalar; @@ -44,7 +44,7 @@ namespace sd { _numOutputs = 1; _opName = opName; - _hash = sd::ops::HashHelper::getInstance()->getLongHash(_opName); + _hash = sd::ops::HashHelper::getInstance().getLongHash(_opName); _opClass = sd::graph::OpClass_CONDITIONAL; _scalar = isScalar; @@ -77,7 +77,7 @@ namespace sd { std::string tmp(opName); _opName = tmp; _allowsInplace = allowsInplace; - _hash = sd::ops::HashHelper::getInstance()->getLongHash(tmp); + _hash = sd::ops::HashHelper::getInstance().getLongHash(tmp); _divergent = false; // just default value diff --git a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp index 65d694dea..327cb0482 100644 --- a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp +++ b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp @@ -31,31 +31,29 @@ namespace sd { template __registrator::__registrator() { auto ptr = new OpName(); - OpRegistrator::getInstance()->registerOperation(ptr); + OpRegistrator::getInstance().registerOperation(ptr); } template __registratorSynonym::__registratorSynonym(const char *name, const char *oname) { - auto ptr = reinterpret_cast(OpRegistrator::getInstance()->getOperation(oname)); + auto ptr = reinterpret_cast(OpRegistrator::getInstance().getOperation(oname)); if (ptr == nullptr) { std::string newName(name); std::string oldName(oname); - OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName); + OpRegistrator::getInstance().updateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName); return; } - OpRegistrator::getInstance()->registerOperation(name, ptr); + OpRegistrator::getInstance().registerOperation(name, ptr); } /////////////////////////////// - OpRegistrator* OpRegistrator::getInstance() { - if (!_INSTANCE) - _INSTANCE = new sd::ops::OpRegistrator(); - - return _INSTANCE; + OpRegistrator& OpRegistrator::getInstance() { + static OpRegistrator instance; + return instance; } @@ -89,21 +87,15 @@ namespace sd { } void OpRegistrator::sigIntHandler(int sig) { -#ifndef _RELEASE - delete OpRegistrator::getInstance(); -#endif + } void OpRegistrator::exitHandler() { -#ifndef _RELEASE - delete OpRegistrator::getInstance(); -#endif + } void OpRegistrator::sigSegVHandler(int sig) { -#ifndef _RELEASE - delete OpRegistrator::getInstance(); -#endif + } OpRegistrator::~OpRegistrator() { @@ -156,7 +148,7 @@ namespace sd { std::pair pair(str, op); _declarablesD.insert(pair); - auto hash = sd::ops::HashHelper::getInstance()->getLongHash(str); + auto hash = sd::ops::HashHelper::getInstance().getLongHash(str); std::pair pair2(hash, op); _declarablesLD.insert(pair2); return true; @@ -256,8 +248,6 @@ namespace sd { return result; } - - sd::ops::OpRegistrator* sd::ops::OpRegistrator::_INSTANCE = 0; } } diff --git a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp index dfc18d33b..245626c09 100644 --- a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp +++ b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp @@ -27,7 +27,7 @@ namespace sd { PlatformHelper::PlatformHelper(const char *name, samediff::Engine engine) { // we just store name/hash of target operation _name = std::string(name); - _hash = HashHelper::getInstance()->getLongHash(_name); + _hash = HashHelper::getInstance().getLongHash(_name); _engine = engine; } diff --git a/libnd4j/include/ops/impl/gemm.cpp b/libnd4j/include/ops/impl/gemm.cpp index 0c4ab167c..8632ddcb9 100644 --- a/libnd4j/include/ops/impl/gemm.cpp +++ b/libnd4j/include/ops/impl/gemm.cpp @@ -68,7 +68,7 @@ namespace sd { if (beta == 0.0) { Z z = 0.f; int length = M*N; - if (length <= Environment::getInstance()->elementwiseThreshold()) { + if (length <= Environment::getInstance().elementwiseThreshold()) { for (int r = 0; r < length; r++) C[r] = z; } else { diff --git a/libnd4j/include/ops/impl/specials_double.hpp b/libnd4j/include/ops/impl/specials_double.hpp index 1eaf3fbc0..d219220ac 100644 --- a/libnd4j/include/ops/impl/specials_double.hpp +++ b/libnd4j/include/ops/impl/specials_double.hpp @@ -224,8 +224,8 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto packY = ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength); auto xLength = shape::length(xShapeInfo); auto xTadLength = shape::length(packX.primaryShapeInfo()); @@ -248,8 +248,8 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength); + auto packY = ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength); auto xLength = shape::length(xShapeInfo); auto xTadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/special_random_ops.h b/libnd4j/include/ops/special_random_ops.h index 08808e67c..f9bacf5cb 100644 --- a/libnd4j/include/ops/special_random_ops.h +++ b/libnd4j/include/ops/special_random_ops.h @@ -163,7 +163,7 @@ namespace randomOps { int elementsPerThread = zLength / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); if (zEWS >= 1 && xEWS >= 1 && yEWS >= 1) { auto func = PRAGMA_THREADS_FOR { @@ -315,7 +315,7 @@ namespace randomOps { int elementsPerThread = middle / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); int span = (middle / _threads) + 8; @@ -434,7 +434,7 @@ namespace randomOps { int elementsPerThread = zLength / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); T prob = extraArguments[1]; @@ -542,7 +542,7 @@ namespace randomOps { int elementsPerThread = zLength / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); T prob = extraArguments[1]; @@ -684,7 +684,7 @@ namespace randomOps { Nd4jLong middle = zLength / 2 + (zLength % 2); int elementsPerThread = middle / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); const T epsilon = static_cast(1e-5); @@ -801,7 +801,7 @@ namespace randomOps { int elementsPerThread = middle / TAD_THRESHOLD; int _threads = sd::math::nd4j_max(1, elementsPerThread); - _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance()->maxThreads()); + _threads = sd::math::nd4j_min(_threads, sd::Environment::getInstance().maxThreads()); int span = (zLength / _threads) + 8; diff --git a/libnd4j/include/system/Environment.h b/libnd4j/include/system/Environment.h index 392e70871..9b2a4b65b 100644 --- a/libnd4j/include/system/Environment.h +++ b/libnd4j/include/system/Environment.h @@ -62,11 +62,9 @@ namespace sd{ // device compute capability for CUDA std::vector _capabilities; - static Environment* _instance; - Environment(); - ~Environment(); public: + ~Environment(); /** * These 3 fields are mostly for CUDA/cuBLAS version tracking */ @@ -74,7 +72,7 @@ namespace sd{ int _blasMinorVersion = 0; int _blasPatchVersion = 0; - static Environment* getInstance(); + static Environment& getInstance(); bool isVerbose(); void setVerbose(bool reallyVerbose); diff --git a/libnd4j/include/system/op_boilerplate.h b/libnd4j/include/system/op_boilerplate.h index 1df4f0047..0c2630f22 100644 --- a/libnd4j/include/system/op_boilerplate.h +++ b/libnd4j/include/system/op_boilerplate.h @@ -118,8 +118,8 @@ #endif -#define ELEMENT_THRESHOLD sd::Environment::getInstance()->elementwiseThreshold() -#define TAD_THRESHOLD sd::Environment::getInstance()->tadThreshold() +#define ELEMENT_THRESHOLD sd::Environment::getInstance().elementwiseThreshold() +#define TAD_THRESHOLD sd::Environment::getInstance().tadThreshold() #define SHAPELIST(...) new ShapeList({__VA_ARGS__}, block.workspace() != nullptr) @@ -129,8 +129,8 @@ #define PRINT_FIRST(...) printf(__VA_ARGS__); fflush(stdout) #endif -#define DEBUG_CALL(STREAM) if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; } -#define DEBUG_KERNEL(STREAM, OP_NUM) if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString(__LINE__); throw std::runtime_error(tOp.c_str()); }; } +#define DEBUG_CALL(STREAM) if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; } +#define DEBUG_KERNEL(STREAM, OP_NUM) if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString(__LINE__); throw std::runtime_error(tOp.c_str()); }; } #define LAUNCH(A, B, C, D) <<>> @@ -1112,7 +1112,7 @@ #define _EXPAND_OP_CALL_1(NAME, TYPE, PARAMZ, NUM_A, TYPE_A) NAME>PARAMZ; #define _EXPAND_OP_DIRECT(PARAMZ, NUM_A, TYPE_A) case NUM_A: { z = TYPE_A::op PARAMZ; break; } -#define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance()->storeOperation(TYPE, #TYPE_A, NUM_A); +#define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance().storeOperation(TYPE, #TYPE_A, NUM_A); #define _EXPAND_FACTORY_CALL(TYPE, LAYER_ID, LAYER_NAME, ACTIVATION_ID, ACTIVATION_NAME) if (activationNum == ACTIVATION_ID && layerNum == LAYER_ID) { return new LAYER_NAME>(); }; @@ -1256,7 +1256,7 @@ struct __registrator_##NAME {\ __registrator_##NAME() {\ OpName *ptr = new OpName(); \ - OpRegistrator::getInstance()->registerOperation(ptr); \ + OpRegistrator::getInstance().registerOperation(ptr); \ }\ };\ static sd::ops::__registrator_##NAME zzz_register_opd_##NAME; @@ -1269,7 +1269,7 @@ struct __registrator_##NAME {\ __registrator_##NAME() {\ OpName *ptr = new OpName(); \ - OpRegistrator::getInstance()->registerOperation(ptr); \ + OpRegistrator::getInstance().registerOperation(ptr); \ }\ };\ static sd::ops::__registrator_##NAME zzz_register_opd_##NAME; @@ -1332,7 +1332,7 @@ auto shapeList = SHAPELIST(); \ auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); \ for (int e = 0; e < opLimit; e++) { \ - auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ + auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ shapeList->push_back(newshape); \ } \ return shapeList; \ @@ -1343,14 +1343,14 @@ #define DECLARE_SYN(NAME, ORIGINAL) template \ struct __registratorSynonym_##NAME {\ __registratorSynonym_##NAME(const char *name, const char *oname) {\ - auto ptr = reinterpret_cast(OpRegistrator::getInstance()->getOperation(oname)); \ + auto ptr = reinterpret_cast(OpRegistrator::getInstance().getOperation(oname)); \ if (ptr == nullptr) { \ std::string newName(name); \ std::string oldName(oname); \ - OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName);\ + OpRegistrator::getInstance().updateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName);\ return;\ }\ - OpRegistrator::getInstance()->registerOperation(name, ptr);\ + OpRegistrator::getInstance().registerOperation(name, ptr);\ }\ };\ static sd::ops::__registratorSynonym_##NAME zzz_register_opd_##NAME(#NAME, #ORIGINAL) @@ -1394,7 +1394,7 @@ auto shapeList = SHAPELIST(); \ auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); \ for (int e = 0; e < opLimit; e++) { \ - auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ + auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ shapeList->push_back(newshape); \ } \ return shapeList; \ @@ -1484,8 +1484,8 @@ #else // we intentionally add 8 tail bytes here to avoid problems with atomic operations -#define ALLOCATE_SPECIAL(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {auto erc_##VARIABLE = cudaMalloc(reinterpret_cast(&VARIABLE), LENGTH * sizeof(TT) + 8); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] allocation failed", erc_##VARIABLE);} else { sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::DEVICE, VARIABLE, LENGTH * sizeof(TT)); }; } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(sd::memory::MemoryType::DEVICE, LENGTH * sizeof(TT) + 8)); } -#define RELEASE_SPECIAL(VARIABLE, WORKSPACE) if (VARIABLE != nullptr) {if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); auto erc_##VARIABLE = cudaFree(reinterpret_cast(VARIABLE)); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] deallocation failed", erc_##VARIABLE);}; }; }; +#define ALLOCATE_SPECIAL(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {auto erc_##VARIABLE = cudaMalloc(reinterpret_cast(&VARIABLE), LENGTH * sizeof(TT) + 8); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] allocation failed", erc_##VARIABLE);} else { sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::DEVICE, VARIABLE, LENGTH * sizeof(TT)); }; } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(sd::memory::MemoryType::DEVICE, LENGTH * sizeof(TT) + 8)); } +#define RELEASE_SPECIAL(VARIABLE, WORKSPACE) if (VARIABLE != nullptr) {if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); auto erc_##VARIABLE = cudaFree(reinterpret_cast(VARIABLE)); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] deallocation failed", erc_##VARIABLE);}; }; }; #endif @@ -1503,12 +1503,12 @@ #else -#define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT)); -#define RELEASE(VARIABLE, WORKSPACE) if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); delete[] VARIABLE;}; +#define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT)); +#define RELEASE(VARIABLE, WORKSPACE) if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); delete[] VARIABLE;}; #endif -#define CONSTANT(SHAPE) ConstantShapeHelper::getInstance()->createFromExisting(SHAPE, block.workspace()) +#define CONSTANT(SHAPE) ConstantShapeHelper::getInstance().createFromExisting(SHAPE, block.workspace()) diff --git a/libnd4j/include/system/platform_boilerplate.h b/libnd4j/include/system/platform_boilerplate.h index bdbb1a051..b74a0530f 100644 --- a/libnd4j/include/system/platform_boilerplate.h +++ b/libnd4j/include/system/platform_boilerplate.h @@ -40,7 +40,7 @@ #define PLATFORM_IMPL_F(NAME, ENGINE, CNAME) struct ND4J_EXPORT __registratorPlatformHelper_##CNAME { \ __registratorPlatformHelper_##CNAME() { \ auto helper = new PLATFORM_##CNAME(); \ - OpRegistrator::getInstance()->registerHelper(helper); \ + OpRegistrator::getInstance().registerHelper(helper); \ } \ }; \ static __registratorPlatformHelper_##CNAME platformHelper_##CNAME; \ diff --git a/libnd4j/minifier/minifier.cpp b/libnd4j/minifier/minifier.cpp index 7846c1846..043f2b696 100644 --- a/libnd4j/minifier/minifier.cpp +++ b/libnd4j/minifier/minifier.cpp @@ -92,7 +92,7 @@ main(int argc, char *argv[]) { arch_arg = opt.arch(); std::vector descriptors; - nd4j_printf("Total available operations: %i\n", OpRegistrator::getInstance()->numberOfOperations()); + nd4j_printf("Total available operations: %i\n", OpRegistrator::getInstance().numberOfOperations()); for (auto file: opt.files()) { // all files will be checked for accessibility & size diff --git a/libnd4j/server/GraphServer.cpp b/libnd4j/server/GraphServer.cpp index a9e8c3ddc..b7615dd5c 100644 --- a/libnd4j/server/GraphServer.cpp +++ b/libnd4j/server/GraphServer.cpp @@ -43,7 +43,7 @@ namespace sd { auto graph = new Graph(flat_graph); // single data type for now - GraphHolder::getInstance()->registerGraph(flat_graph->id(), graph); + GraphHolder::getInstance().registerGraph(flat_graph->id(), graph); // sending out OK response auto response_offset = CreateFlatResponse(mb_, 0); @@ -66,7 +66,7 @@ namespace sd { auto graph = new Graph(flat_graph); // single data type for now - GraphHolder::getInstance()->replaceGraph(flat_graph->id(), graph); + GraphHolder::getInstance().replaceGraph(flat_graph->id(), graph); // sending out OK response auto response_offset = CreateFlatResponse(mb_, 0); @@ -91,7 +91,7 @@ namespace sd { auto request = request_msg->GetRoot(); // dropping out graph (any datatype) - GraphHolder::getInstance()->dropGraphAny(request->id()); + GraphHolder::getInstance().dropGraphAny(request->id()); // sending out OK response auto response_offset = CreateFlatResponse(mb_, 0); @@ -111,7 +111,7 @@ namespace sd { try { // GraphHolder - auto response_offset = GraphHolder::getInstance()->execute(request->id(), mb_, request); + auto response_offset = GraphHolder::getInstance().execute(request->id(), mb_, request); mb_.Finish(response_offset); *response_msg = mb_.ReleaseMessage(); @@ -181,7 +181,7 @@ int main(int argc, char *argv[]) { if(cmdOptionExists(argv, argv+argc, "-f")) { auto file = getCmdOption(argv, argv + argc, "-f"); auto graph = GraphExecutioner::importFromFlatBuffers(file); - sd::graph::GraphHolder::getInstance()->registerGraph(0L, graph); + sd::graph::GraphHolder::getInstance().registerGraph(0L, graph); } RunServer(port); diff --git a/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp b/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp index 00752ca0f..5167abcd1 100644 --- a/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp @@ -30,13 +30,13 @@ using namespace sd::graph; class ConditionalTests : public testing::Test { public: ConditionalTests(){ - //Environment::getInstance()->setVerbose(true); - //Environment::getInstance()->setDebug(true); + //Environment::getInstance().setVerbose(true); + //Environment::getInstance().setDebug(true); } ~ConditionalTests(){ - //Environment::getInstance()->setVerbose(false); - //Environment::getInstance()->setDebug(false); + //Environment::getInstance().setVerbose(false); + //Environment::getInstance().setDebug(false); } }; @@ -139,8 +139,8 @@ TEST_F(ConditionalTests, Flat_Test_1) { * Condition is True */ TEST_F(ConditionalTests, Flat_Test_2) { - Environment::getInstance()->setDebug(true); - Environment::getInstance()->setVerbose(true); + Environment::getInstance().setDebug(true); + Environment::getInstance().setVerbose(true); sd::ops::identity op0; auto graph = GraphExecutioner::importFromFlatBuffers("./resources/simpleif_0.fb"); diff --git a/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp b/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp index 5b747ab5b..a9a42ac88 100644 --- a/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp @@ -45,15 +45,15 @@ public: }; TEST_F(ConstantShapeHelperTests, test_cachedAmount_1) { - auto ttlBefore = ConstantShapeHelper::getInstance()->totalCachedEntries(); + auto ttlBefore = ConstantShapeHelper::getInstance().totalCachedEntries(); auto arrayA = NDArrayFactory::create('c', {7, 11, 17, 23, 31, 43}); - auto ttlMiddle = ConstantShapeHelper::getInstance()->totalCachedEntries(); + auto ttlMiddle = ConstantShapeHelper::getInstance().totalCachedEntries(); auto arrayB = NDArrayFactory::create('c', {7, 11, 17, 23, 31, 43}); - auto ttlAfter = ConstantShapeHelper::getInstance()->totalCachedEntries(); + auto ttlAfter = ConstantShapeHelper::getInstance().totalCachedEntries(); ASSERT_TRUE(ttlBefore <= ttlMiddle); ASSERT_EQ(ttlMiddle, ttlAfter); @@ -61,15 +61,15 @@ TEST_F(ConstantShapeHelperTests, test_cachedAmount_1) { TEST_F(ConstantTadHelperTests, test_cachedAmount_1) { auto arrayA = NDArrayFactory::create('c', {7, 11, 17, 23, 31, 43}); - auto ttlBefore = ConstantTadHelper::getInstance()->totalCachedEntries(); + auto ttlBefore = ConstantTadHelper::getInstance().totalCachedEntries(); - auto packAA = ConstantTadHelper::getInstance()->tadForDimensions(arrayA.shapeInfo(), {3, 4}); + auto packAA = ConstantTadHelper::getInstance().tadForDimensions(arrayA.shapeInfo(), {3, 4}); - auto ttlMiddle = ConstantTadHelper::getInstance()->totalCachedEntries(); + auto ttlMiddle = ConstantTadHelper::getInstance().totalCachedEntries(); - auto packAB = ConstantTadHelper::getInstance()->tadForDimensions(arrayA.shapeInfo(), {3, 4}); + auto packAB = ConstantTadHelper::getInstance().tadForDimensions(arrayA.shapeInfo(), {3, 4}); - auto ttlAfter = ConstantTadHelper::getInstance()->totalCachedEntries(); + auto ttlAfter = ConstantTadHelper::getInstance().totalCachedEntries(); ASSERT_TRUE(ttlBefore <= ttlMiddle); ASSERT_EQ(ttlMiddle, ttlAfter); @@ -88,13 +88,13 @@ TEST_F(ConstantShapeHelperTests, basic_test_1) { ASSERT_EQ(sd::DataType::BFLOAT16, descriptor.dataType()); ASSERT_FALSE(descriptor.isEmpty()); - ASSERT_FALSE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(descriptor)); + ASSERT_FALSE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(descriptor)); - auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor); + auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor); - ASSERT_TRUE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(descriptor)); + ASSERT_TRUE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(descriptor)); - auto buffer2 = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor2); + auto buffer2 = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor2); ASSERT_TRUE(buffer.primary() != nullptr); @@ -109,14 +109,14 @@ TEST_F(ConstantShapeHelperTests, stress_test_1) { for (auto x = 0; x < 1000; x++) { auto ptr = ShapeBuilders::createShapeInfo(sd::DataType::FLOAT32, 'c', {5, x + 10, x + 1}); ShapeDescriptor descriptor(ptr); - ConstantShapeHelper::getInstance()->createShapeInfo(descriptor); + ConstantShapeHelper::getInstance().createShapeInfo(descriptor); delete [] ptr; } ShapeDescriptor aShape(sd::DataType::FLOAT32, 'c', {(Nd4jLong)5, (Nd4jLong)382, (Nd4jLong)373}); -// nd4j_printf("%d\n", ConstantShapeHelper::getInstance()->cachedEntriesForDevice(0)); +// nd4j_printf("%d\n", ConstantShapeHelper::getInstance().cachedEntriesForDevice(0)); auto timeStart = std::chrono::system_clock::now(); - ASSERT_TRUE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(aShape)); + ASSERT_TRUE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(aShape)); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); @@ -146,7 +146,7 @@ TEST_F(ConstantShapeHelperTests, basic_test_4) { #ifdef __CUDABLAS__ ASSERT_TRUE(dup->specialShapeInfo() != nullptr); PointersManager manager(sd::LaunchContext ::defaultContext(), "test"); - // manager.printDevContentOnDev(dup->specialShapeInfo(), shape::shapeInfoLength(2), 0); + // manager.printDevContentOnDev(dup->special(), shape::shapeInfoLength(2), 0); #endif delete array; @@ -195,14 +195,14 @@ TEST_F(ConstantHelperTests, basic_test_1) { ConstantDescriptor descriptor({1, 2, 3}); - ConstantDataBuffer* fBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::FLOAT32); + ConstantDataBuffer* fBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::FLOAT32); auto fPtr = fBuffer->primaryAsT(); ASSERT_NEAR(1.f, fPtr[0], 1e-5); ASSERT_NEAR(2.f, fPtr[1], 1e-5); ASSERT_NEAR(3.f, fPtr[2], 1e-5); - auto iBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::INT32); + auto iBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::INT32); auto iPtr = iBuffer->primaryAsT(); ASSERT_EQ(1, iPtr[0]); @@ -215,14 +215,14 @@ TEST_F(ConstantHelperTests, basic_test_2) { double array[] = {1., 2., 3.}; ConstantDescriptor descriptor(array, 3); - ConstantDataBuffer* fBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::FLOAT32); + ConstantDataBuffer* fBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::FLOAT32); auto fPtr = fBuffer->primaryAsT(); ASSERT_NEAR(1.f, fPtr[0], 1e-5); ASSERT_NEAR(2.f, fPtr[1], 1e-5); ASSERT_NEAR(3.f, fPtr[2], 1e-5); - auto iBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::INT32); + auto iBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::INT32); auto iPtr = iBuffer->primaryAsT(); ASSERT_EQ(1, iPtr[0]); diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp index 4438e5fe6..b87985458 100644 --- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp @@ -2013,7 +2013,6 @@ TYPED_TEST(TypedConvolutionTests1, conv3d_test9) { ASSERT_TRUE(exp.isSameShape(z)); - shapes->destroy(); delete shapes; } diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu index cbcbe2c15..d8ed2a264 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu @@ -254,7 +254,7 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {1,2,3,4}, sd::DataType::INT32); @@ -970,7 +970,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execScalar_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,3}, {0,1,2,3,4,5}, sd::DataType::INT64); @@ -1005,7 +1005,7 @@ TEST_F(CudaBasicsTests1, execScalar_1) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execScalar_2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,3}, {-1,-2,-3,-4,-5,-6}, sd::DataType::INT64); @@ -1041,7 +1041,7 @@ TEST_F(CudaBasicsTests1, execScalar_2) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execScalar_3) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,3,2}, {0,1,2,3,4,5,6,7,8,9,10,11}, sd::DataType::INT64); @@ -1192,7 +1192,7 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execBroadcast_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32); @@ -1252,7 +1252,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execBroadcast_2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32); @@ -1429,7 +1429,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) { //////////////////////////////////////////////////////////////////////////// TEST_F(CudaBasicsTests1, execPairwiseTransform_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x('c', {2,2,2}, {1,5,3,7,2,6,4,8}, sd::DataType::INT32); @@ -2544,7 +2544,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_1) { NDArray z('c', {3}, {100,100,100}, sd::DataType::DOUBLE); std::vector dimensions = {0,1}; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions); LaunchContext* context = x.getContext(); x.syncToDevice(); @@ -2858,7 +2858,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) { NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.special(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], true); @@ -2941,13 +2941,13 @@ TEST_F(CudaBasicsTests1, execRandom_1) { // cudaResult = cudaStreamCreate(&stream); ASSERT_EQ(0, cudaResult); // LaunchContext lc(&stream); // -// // ::execRandom(extraPointers, random::GaussianDistribution, &gen, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &extra); +// // ::execRandom(extraPointers, random::GaussianDistribution, &gen, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.special(), &extra); // // call cuda kernel which calculates result // NativeOpExecutioner::execRandom(&lc, sd::random::GaussianDistribution, // &gen, -// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.special(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.special(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.special(), // extraArguments.argumentsAsT(z.dataType())); // // cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu index b425ffcbb..28102cad5 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu @@ -40,7 +40,7 @@ public: }; TEST_F(CudaBasicsTests2, test_devices_1) { - auto caps = Environment::getInstance()->capabilities(); + auto caps = Environment::getInstance().capabilities(); ASSERT_FALSE(caps.empty()); } @@ -259,7 +259,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_12) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 4; const Nd4jLong K = 4; @@ -282,7 +282,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_13) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -304,7 +304,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_14) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -326,7 +326,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_15) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -349,7 +349,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_16) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -371,7 +371,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_17) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -393,7 +393,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_18) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -415,7 +415,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_19) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return; const Nd4jLong M = 3; const Nd4jLong K = 4; @@ -437,7 +437,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_20) { int devCnt = 0; cudaGetDevice(&devCnt); - if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return; + if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return; const Nd4jLong M = 3; const Nd4jLong K = 4; diff --git a/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp b/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp index 42ab543b1..b22f9e765 100644 --- a/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp @@ -39,31 +39,31 @@ public: }; TEST_F(DataBufferTests, test_alloc_limit_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto deviceId = AffinityManager::currentDeviceId(); - auto odLimit = MemoryCounter::getInstance()->deviceLimit(deviceId); - auto ogLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::HOST); - auto odUse = MemoryCounter::getInstance()->allocatedDevice(deviceId); - auto ogUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST); + auto odLimit = MemoryCounter::getInstance().deviceLimit(deviceId); + auto ogLimit = MemoryCounter::getInstance().groupLimit(MemoryType::HOST); + auto odUse = MemoryCounter::getInstance().allocatedDevice(deviceId); + auto ogUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST); auto limitSize = odUse + (150 * 1024 * 1024); auto allocSize = 100000000; - MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit + limitSize); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, odLimit + limitSize); + MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit + limitSize); + MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, odLimit + limitSize); DataBuffer buffer(allocSize, DataType::INT32); // separately testing per-device limits and group limits - ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance()->allocatedDevice(deviceId)); - ASSERT_EQ(ogUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST)); + ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance().allocatedDevice(deviceId)); + ASSERT_EQ(ogUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST)); // setting smaller limits, to make sure next allocation fails with OOM exception - MemoryCounter::getInstance()->setDeviceLimit(deviceId, allocSize - 100); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, allocSize - 100); + MemoryCounter::getInstance().setDeviceLimit(deviceId, allocSize - 100); + MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, allocSize - 100); try { DataBuffer bufferFailed(allocSize, DataType::INT32); @@ -73,6 +73,6 @@ TEST_F(DataBufferTests, test_alloc_limit_1) { } // restore original limits, so subsequent tests do not fail - MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, odLimit); + MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit); + MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, odLimit); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu b/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu index 730ade824..6f7d38ede 100644 --- a/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu +++ b/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu @@ -42,33 +42,33 @@ public: TEST_F(DataBufferTestsCuda, test_alloc_limit_1) { auto deviceId = AffinityManager::currentDeviceId(); - auto odLimit = MemoryCounter::getInstance()->deviceLimit(deviceId); + auto odLimit = MemoryCounter::getInstance().deviceLimit(deviceId); - auto opLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::HOST); - auto osLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::DEVICE); + auto opLimit = MemoryCounter::getInstance().groupLimit(MemoryType::HOST); + auto osLimit = MemoryCounter::getInstance().groupLimit(MemoryType::DEVICE); - auto odUse = MemoryCounter::getInstance()->allocatedDevice(deviceId); + auto odUse = MemoryCounter::getInstance().allocatedDevice(deviceId); - auto opUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST); - auto osUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::DEVICE); + auto opUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST); + auto osUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::DEVICE); auto limitSize = odUse + 150000000; auto allocSize = 100000000; - MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit + limitSize); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, opLimit + limitSize); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, osLimit + limitSize); + MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit + limitSize); + MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, opLimit + limitSize); + MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, osLimit + limitSize); DataBuffer buffer(allocSize, DataType::INT32, nullptr, true); // separately testing per-device limits and group limits - ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance()->allocatedDevice(deviceId)); - ASSERT_EQ(opUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST)); - ASSERT_EQ(osUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::DEVICE)); + ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance().allocatedDevice(deviceId)); + ASSERT_EQ(opUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST)); + ASSERT_EQ(osUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::DEVICE)); // setting smaller limits, to make sure next allocation fails with OOM exception - MemoryCounter::getInstance()->setDeviceLimit(deviceId, allocSize - 100); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, allocSize - 100); + MemoryCounter::getInstance().setDeviceLimit(deviceId, allocSize - 100); + MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, allocSize - 100); // this allocation should fail, since we're allocating too much @@ -82,8 +82,8 @@ TEST_F(DataBufferTestsCuda, test_alloc_limit_1) { // // restore original limits, so subsequent tests do not fail - MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, opLimit); - MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, osLimit); + MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit); + MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, opLimit); + MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, osLimit); } */ \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp index 959362c4d..a5715fd01 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp @@ -55,11 +55,11 @@ public: const int oW = (iW - kW - (kW - 1) * (dW - 1) + 2 * pW) / sW + 1; // output width DeclarableOpsTests1() { - sd::memory::MemoryTracker::getInstance()->reset(); + sd::memory::MemoryTracker::getInstance().reset(); } ~DeclarableOpsTests1() { - sd::memory::MemoryTracker::getInstance()->summarize(); + sd::memory::MemoryTracker::getInstance().summarize(); } }; @@ -144,7 +144,7 @@ TEST_F(DeclarableOpsTests1, BasicInitialization1) { ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests1, BasicInitialization2) { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation("concat"); + auto op = sd::ops::OpRegistrator::getInstance().getOperation("concat"); ASSERT_TRUE(op != nullptr); std::string expName("concat"); @@ -217,19 +217,19 @@ TEST_F(DeclarableOpsTests1, AXpY_Test_1) { } TEST_F(DeclarableOpsTests1, BasicInitialization3) { - auto op1 = sd::ops::OpRegistrator::getInstance()->getOperation("concat"); + auto op1 = sd::ops::OpRegistrator::getInstance().getOperation("concat"); std::string expName("concat"); - auto hash = sd::ops::HashHelper::getInstance()->getLongHash(expName); + auto hash = sd::ops::HashHelper::getInstance().getLongHash(expName); - auto op2 = sd::ops::OpRegistrator::getInstance()->getOperation(hash); + auto op2 = sd::ops::OpRegistrator::getInstance().getOperation(hash); ASSERT_TRUE(op1 == op2); } TEST_F(DeclarableOpsTests1, SynonymInitialization2) { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation("Mul"); - auto op2 = sd::ops::OpRegistrator::getInstance()->getOperation("multiply"); + auto op = sd::ops::OpRegistrator::getInstance().getOperation("Mul"); + auto op2 = sd::ops::OpRegistrator::getInstance().getOperation("multiply"); ASSERT_TRUE(op != nullptr); std::string expName("multiply"); @@ -597,7 +597,7 @@ TEST_F(DeclarableOpsTests1, TestTensorDot17) { ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests1, DivergentCheck1) { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation("switch"); + auto op = sd::ops::OpRegistrator::getInstance().getOperation("switch"); ASSERT_TRUE(op != nullptr); std::string expName("Switch"); @@ -1695,7 +1695,7 @@ TEST_F(DeclarableOpsTests1, Test_Cast_1) { ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests1, TestRegistrator1) { - auto res = sd::ops::OpRegistrator::getInstance()->getAllCustomOperations(); + auto res = sd::ops::OpRegistrator::getInstance().getAllCustomOperations(); } // ////////////////////////////////////////////////////////////////////// @@ -1713,7 +1713,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // z->assign(120.0f); // std::string opName("add"); -// auto hash = sd::ops::HashHelper::getInstance()->getInstance()->getLongHash(opName); +// auto hash = sd::ops::HashHelper::getInstance().getInstance()->getLongHash(opName); // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; @@ -1763,7 +1763,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // std::string opName("add"); -// auto hash = sd::ops::HashHelper::getInstance()->getInstance()->getLongHash(opName); +// auto hash = sd::ops::HashHelper::getInstance().getInstance()->getLongHash(opName); // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp index 66762f79d..9e5281afe 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp @@ -780,8 +780,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -816,8 +816,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; #ifdef __CUDABLAS__ diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index c7222e6f7..639d90389 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -2717,7 +2717,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test9) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -2768,7 +2768,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test10) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -2831,7 +2831,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test11) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions, true); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions); NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp index b4c9839ab..ef35bfa72 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp @@ -40,7 +40,7 @@ public: TEST_F(DeclarableOpsTests14, Test_Validation_Edge_1) { auto x = NDArrayFactory::create('c', {2}, {2, 2}); - auto exp = NDArrayFactory::create('c', {2, 2}, Environment::getInstance()->defaultFloatDataType()); + auto exp = NDArrayFactory::create('c', {2, 2}, Environment::getInstance().defaultFloatDataType()); exp.assign(4.0f); sd::ops::fill op; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp index 38006dd50..2a099230e 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp @@ -1945,7 +1945,7 @@ TEST_F(DeclarableOpsTests3, svd_test1) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -1981,7 +1981,7 @@ TEST_F(DeclarableOpsTests3, svd_test2) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2017,7 +2017,7 @@ TEST_F(DeclarableOpsTests3, svd_test3) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2053,7 +2053,7 @@ TEST_F(DeclarableOpsTests3, svd_test4) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2089,7 +2089,7 @@ TEST_F(DeclarableOpsTests3, svd_test5) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2143,7 +2143,7 @@ TEST_F(DeclarableOpsTests3, svd_test6) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2317,7 +2317,7 @@ TEST_F(DeclarableOpsTests3, svd_test7) { // ASSERT_TRUE(expS.equalsTo(s)); - // if(sd::Environment::getInstance()->isCPU()) { + // if(sd::Environment::getInstance().isCPU()) { // ASSERT_TRUE(expU.equalsTo(u)); // ASSERT_TRUE(expV.equalsTo(v)); // } @@ -2380,7 +2380,7 @@ TEST_F(DeclarableOpsTests3, svd_test9) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2438,7 +2438,7 @@ TEST_F(DeclarableOpsTests3, svd_test10) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } @@ -2482,7 +2482,7 @@ TEST_F(DeclarableOpsTests3, svd_test11) { ASSERT_TRUE(expS.equalsTo(s)); - if(sd::Environment::getInstance()->isCPU()) { + if(sd::Environment::getInstance().isCPU()) { ASSERT_TRUE(expU.equalsTo(u)); ASSERT_TRUE(expV.equalsTo(v)); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp index 1e877ecc6..56e5e213a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp @@ -641,7 +641,7 @@ TEST_F(DeclarableOpsTests4, biasadd_bp_2) { } TEST_F(DeclarableOpsTests4, biasadd_4) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}); diff --git a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp index 81040185d..c142fb9aa 100644 --- a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp @@ -216,7 +216,7 @@ TEST_F(EmptyTests, test_shaped_empty_3) { } TEST_F(EmptyTests, test_shaped_empty_4) { - const auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32); + const auto shape = ConstantShapeHelper::getInstance().vectorShapeInfo(0, sd::DataType::FLOAT32); NDArray array(shape, true, sd::LaunchContext::defaultContext()); std::vector shapeOf({0}); diff --git a/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp b/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp index 87ac750b2..aa4a72f70 100644 --- a/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp @@ -34,7 +34,7 @@ public: }; TEST_F(ExtraArgumentsTests, Basic_Test_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; ExtraArguments args({1.0, 2.0, 3.0}); diff --git a/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp b/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp index bdb8bde68..437edb525 100644 --- a/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp @@ -39,15 +39,15 @@ public: Nd4jLong *fShape = new Nd4jLong[8]{2, 2, 2, 1, 2, 8192, 1, 102}; FlatBuffersTest() { - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); - Environment::getInstance()->setProfiling(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); + Environment::getInstance().setProfiling(false); } ~FlatBuffersTest() { - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); - Environment::getInstance()->setProfiling(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); + Environment::getInstance().setProfiling(false); delete[] cShape; delete[] fShape; @@ -676,8 +676,8 @@ TEST_F(FlatBuffersTest, Test_Stitches) { } TEST_F(FlatBuffersTest, Test_GruDynamicMnist) { - sd::Environment::getInstance()->setDebug(false); - sd::Environment::getInstance()->setVerbose(false); + sd::Environment::getInstance().setDebug(false); + sd::Environment::getInstance().setVerbose(false); auto graph = GraphExecutioner::importFromFlatBuffers("./resources/gru_dynamic_mnist.fb"); //graph->printOut(); @@ -696,8 +696,8 @@ TEST_F(FlatBuffersTest, Test_GruDynamicMnist) { } TEST_F(FlatBuffersTest, Test_Non2D_2) { - sd::Environment::getInstance()->setDebug(false); - sd::Environment::getInstance()->setVerbose(false); + sd::Environment::getInstance().setDebug(false); + sd::Environment::getInstance().setVerbose(false); sd::ops::realdiv op0; auto graph = GraphExecutioner::importFromFlatBuffers("./resources/non2d_2.fb"); @@ -711,8 +711,8 @@ TEST_F(FlatBuffersTest, Test_Non2D_2) { TEST_F(FlatBuffersTest, Test_TensorDotMisc) { - Environment::getInstance()->setVerbose(false); - Environment::getInstance()->setDebug(false); + Environment::getInstance().setVerbose(false); + Environment::getInstance().setDebug(false); auto e = NDArrayFactory::create('c', {1, 3, 16, 20}, {4.f, 6.f, 6.f, 5.f, 6.f, 4.f, 2.f, 3.f, 5.f, 5.f, 1.f, 4.f, 6.f, 3.f, 2.f, 1.f, 5.f, 4.f, 4.f, 4.f, 4.f, 4.f, 3.f, 4.f, 2.f, 3.f, 3.f, 5.f, 3.f, 6.f, 5.f, 4.f, 4.f, 3.f, 6.f, 1.f, 2.f, 4.f, 2.f, 6.f, 4.f, 2.f, 3.f, 2.f, 3.f, 1.f, 2.f, 4.f, 3.f, 5.f, 3.f, 3.f, 5.f, 2.f, 6.f, 3.f, 4.f, 4.f, 4.f, 4.f, 6.f, 4.f, 5.f, 2.f, 5.f, 5.f, 5.f, 5.f, 2.f, 4.f, 4.f, 4.f, 5.f, 4.f, 3.f, 6.f, 3.f, 4.f, 5.f, 2.f, 5.f, 4.f, 4.f, 5.f, 4.f, 3.f, 4.f, 5.f, 5.f, 3.f, 5.f, 6.f, 6.f, 3.f, 4.f, 5.f, 7.f, 6.f, 5.f, 2.f, 4.f, 5.f, 5.f, 4.f, 5.f, 4.f, 4.f, 6.f, 3.f, 4.f, 5.f, 4.f, 6.f, 2.f, 3.f, 4.f, 3.f, 3.f, 2.f, 2.f, 3.f, 4.f, 7.f, 3.f, 5.f, 4.f, 5.f, 4.f, 4.f, 4.f, 4.f, 6.f, 2.f, 3.f, 2.f, 5.f, 5.f, 4.f, 5.f, 2.f, 2.f, 1.f, 6.f, 2.f, 2.f, 3.f, 4.f, 5.f, 5.f, 3.f, 6.f, 6.f, 4.f, 3.f, 3.f, 3.f, 3.f, 3.f, 4.f, 5.f, 4.f, 4.f, 3.f, 5.f, 2.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 5.f, 8.f, 4.f, 5.f, 3.f, 3.f, 4.f, 4.f, 5.f, 4.f, 5.f, 3.f, 3.f, 7.f, 2.f, 3.f, 2.f, 6.f, 6.f, 4.f, 4.f, 3.f, 5.f, 6.f, 2.f, 4.f, 3.f, 3.f, 4.f, 5.f, 3.f, 3.f, 6.f, 5.f, 3.f, 2.f, 5.f, 4.f, 4.f, 3.f, 5.f, 5.f, 6.f, 7.f, 3.f, 4.f, 3.f, 5.f, 6.f, 7.f, 5.f, 6.f, 5.f, 7.f, 4.f, 6.f, 5.f, 5.f, 6.f, 4.f, 2.f, 5.f, 4.f, 3.f, 4.f, 1.f, 5.f, 5.f, 3.f, 2.f, 2.f, 6.f, 5.f, 5.f, 2.f, 5.f, 2.f, 4.f, 4.f, 5.f, 5.f, 4.f, 3.f, 7.f, 4.f, 5.f, 3.f, 3.f, 3.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 4.f, 2.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 7.f, 2.f, 1.f, 3.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 3.f, 4.f, 2.f, 4.f, 4.f, 4.f, 5.f, 3.f, 5.f, 3.f, 6.f, 6.f, 5.f, 3.f, 5.f, 3.f, 4.f, 3.f, 5.f, 3.f, 5.f, 6.f, 5.f, 3.f, 4.f, 5.f, 5.f, 3.f, 3.f, 3.f, 4.f, 6.f, 4.f, 3.f, 7.f, 4.f, 4.f, 6.f, 7.f, 5.f, 5.f, 3.f, 1.f, 2.f, 5.f, 5.f, 2.f, 5.f, 7.f, 5.f, 3.f, 1.f, 4.f, 6.f, 5.f, 7.f, 5.f, 6.f, 5.f, 6.f, 4.f, 3.f, 3.f, 4.f, 3.f, 4.f, 4.f, 4.f, 4.f, 3.f, 5.f, 2.f, 4.f, 5.f, 2.f, 5.f, 5.f, 4.f, 5.f, 4.f, 5.f, 2.f, 3.f, 5.f, 3.f, 6.f, 3.f, 4.f, 5.f, 3.f, 6.f, 5.f, 5.f, 6.f, 4.f, 6.f, 7.f, 4.f, 5.f, 3.f, 5.f, 4.f, 4.f, 4.f, 2.f, 2.f, 5.f, 3.f, 5.f, 3.f, 4.f, 6.f, 3.f, 5.f, 5.f, 3.f, 5.f, 4.f, 4.f, 4.f, 5.f, 2.f, 3.f, 5.f, 4.f, 2.f, 4.f, 5.f, 4.f, 2.f, 3.f, 4.f, 4.f, 5.f, 5.f, 1.f, 4.f, 4.f, 4.f, 3.f, 4.f, 5.f, 5.f, 8.f, 4.f, 4.f, 4.f, 3.f, 6.f, 2.f, 3.f, 4.f, 4.f, 4.f, 3.f, 2.f, 3.f, 4.f, 8.f, 3.f, 5.f, 5.f, 5.f, 3.f, 3.f, 4.f, 5.f, 7.f, 3.f, 3.f, 3.f, 6.f, 6.f, 5.f, 5.f, 3.f, 4.f, 3.f, 8.f, 3.f, 4.f, 2.f, 3.f, 4.f, 4.f, 3.f, 5.f, 5.f, 3.f, 2.f, 3.f, 3.f, 3.f, 4.f, 4.f, 4.f, 6.f, 6.f, 5.f, 6.f, 4.f, 5.f, 4.f, 6.f, 4.f, 5.f, 5.f, 4.f, 7.f, 3.f, 5.f, 5.f, 3.f, 5.f, 5.f, 6.f, 4.f, 5.f, 4.f, 2.f, 7.f, 2.f, 3.f, 1.f, 4.f, 5.f, 5.f, 4.f, 4.f, 5.f, 7.f, 2.f, 3.f, 3.f, 4.f, 4.f, 5.f, 3.f, 3.f, 6.f, 6.f, 3.f, 2.f, 4.f, 3.f, 3.f, 3.f, 3.f, 4.f, 4.f, 5.f, 1.f, 2.f, 3.f, 3.f, 4.f, 5.f, 4.f, 5.f, 4.f, 5.f, 6.f, 6.f, 6.f, 6.f, 7.f, 4.f, 3.f, 4.f, 5.f, 4.f, 4.f, 2.f, 5.f, 6.f, 4.f, 2.f, 2.f, 6.f, 5.f, 5.f, 1.f, 4.f, 2.f, 3.f, 4.f, 5.f, 5.f, 4.f, 5.f, 9.f, 4.f, 6.f, 4.f, 5.f, 5.f, 3.f, 4.f, 5.f, 5.f, 5.f, 4.f, 3.f, 1.f, 3.f, 4.f, 3.f, 4.f, 4.f, 3.f, 6.f, 2.f, 3.f, 3.f, 2.f, 3.f, 3.f, 4.f, 5.f, 6.f, 5.f, 5.f, 3.f, 4.f, 5.f, 5.f, 4.f, 3.f, 4.f, 3.f, 6.f, 7.f, 6.f, 4.f, 6.f, 4.f, 3.f, 3.f, 4.f, 3.f, 5.f, 5.f, 4.f, 2.f, 3.f, 4.f, 5.f, 3.f, 4.f, 2.f, 4.f, 5.f, 3.f, 3.f, 7.f, 4.f, 2.f, 5.f, 6.f, 5.f, 5.f, 3.f, 1.f, 2.f, 4.f, 4.f, 1.f, 3.f, 6.f, 3.f, 3.f, 1.f, 4.f, 4.f, 4.f, 5.f, 3.f, 4.f, 3.f, 4.f, 2.f, 3.f, 3.f, 4.f, 3.f, 4.f, 3.f, 3.f, 4.f, 2.f, 5.f, 1.f, 3.f, 4.f, 2.f, 6.f, 4.f, 3.f, 4.f, 3.f, 3.f, 1.f, 2.f, 5.f, 2.f, 6.f, 4.f, 5.f, 6.f, 3.f, 6.f, 4.f, 4.f, 5.f, 3.f, 5.f, 6.f, 3.f, 4.f, 2.f, 4.f, 5.f, 5.f, 5.f, 2.f, 3.f, 4.f, 3.f, 5.f, 3.f, 3.f, 9.f, 6.f, 7.f, 7.f, 4.f, 4.f, 3.f, 3.f, 4.f, 4.f, 3.f, 4.f, 6.f, 5.f, 3.f, 5.f, 5.f, 5.f, 2.f, 4.f, 6.f, 7.f, 7.f, 5.f, 3.f, 4.f, 5.f, 4.f, 4.f, 5.f, 5.f, 5.f, 8.f, 4.f, 4.f, 4.f, 3.f, 5.f, 3.f, 3.f, 4.f, 4.f, 5.f, 3.f, 3.f, 2.f, 3.f, 6.f, 2.f, 5.f, 4.f, 4.f, 3.f, 3.f, 3.f, 5.f, 7.f, 2.f, 3.f, 2.f, 5.f, 5.f, 4.f, 4.f, 2.f, 2.f, 1.f, 6.f, 1.f, 2.f, 2.f, 3.f, 5.f, 4.f, 3.f, 5.f, 5.f, 3.f, 2.f, 2.f, 2.f, 2.f, 4.f, 3.f, 4.f, 4.f, 4.f, 4.f, 5.f, 2.f, 4.f, 4.f, 5.f, 2.f, 4.f, 4.f, 5.f, 9.f, 4.f, 5.f, 4.f, 3.f, 5.f, 5.f, 6.f, 4.f, 4.f, 3.f, 3.f, 6.f, 2.f, 3.f, 2.f, 5.f, 6.f, 4.f, 4.f, 3.f, 5.f, 6.f, 4.f, 5.f, 5.f, 6.f, 7.f, 4.f, 2.f, 3.f, 5.f, 4.f, 4.f, 3.f, 5.f, 5.f, 4.f, 3.f, 4.f, 5.f, 4.f, 6.f, 3.f, 4.f, 4.f, 5.f, 6.f, 6.f, 4.f, 6.f, 6.f, 6.f, 5.f, 6.f, 6.f, 7.f, 7.f, 4.f, 3.f, 4.f, 4.f, 4.f, 5.f, 2.f, 5.f, 7.f, 5.f, 2.f, 1.f, 5.f, 5.f, 4.f, 1.f, 4.f, 1.f, 3.f, 3.f, 5.f, 4.f, 4.f, 3.f, 7.f, 3.f, 6.f, 3.f, 3.f, 4.f, 1.f, 3.f, 2.f, 3.f, 3.f, 4.f, 3.f, 1.f, 3.f, 4.f, 2.f, 4.f, 4.f, 2.f, 6.f, 1.f, 2.f, 2.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 4.f, 4.f, 2.f, 4.f, 4.f, 4.f, 5.f, 5.f, 5.f, 4.f, 8.f, 5.f, 5.f, 3.f, 5.f, 3.f, 3.f, 2.f, 4.f, 3.f, 5.f, 6.f, 5.f, 3.f, 4.f, 5.f, 5.f, 3.f, 4.f, 3.f, 4.f, 8.f, 6.f, 5.f, 9.f, 6.f}); diff --git a/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp index f1f7195e7..a50091840 100644 --- a/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp @@ -33,13 +33,13 @@ public: TEST_F(GraphHolderTests, SimpleTests_1) { Graph graph; Nd4jLong graphId = 119; - GraphHolder::getInstance()->registerGraph(graphId, &graph); + GraphHolder::getInstance().registerGraph(graphId, &graph); - ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId)); - GraphHolder::getInstance()->forgetGraph(graphId); + GraphHolder::getInstance().forgetGraph(graphId); - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId)); } @@ -47,18 +47,18 @@ TEST_F(GraphHolderTests, SimpleTests_1) { TEST_F(GraphHolderTests, SimpleTests_2) { auto graph = new Graph; Nd4jLong graphId = 117; - GraphHolder::getInstance()->registerGraph(graphId, graph); + GraphHolder::getInstance().registerGraph(graphId, graph); - ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId)); - auto graph2 = GraphHolder::getInstance()->cloneGraph(graphId); + auto graph2 = GraphHolder::getInstance().cloneGraph(graphId); ASSERT_TRUE(graph != graph2); ASSERT_TRUE(graph2 != nullptr); - GraphHolder::getInstance()->forgetGraph(graphId); + GraphHolder::getInstance().forgetGraph(graphId); - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId)); delete graph; delete graph2; @@ -68,18 +68,18 @@ TEST_F(GraphHolderTests, SimpleTests_2) { TEST_F(GraphHolderTests, SimpleTests_3) { auto graph = new Graph; Nd4jLong graphId = 117; - GraphHolder::getInstance()->registerGraph(graphId, graph); + GraphHolder::getInstance().registerGraph(graphId, graph); - ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId)); - auto graph2 = GraphHolder::getInstance()->cloneGraph(graphId); + auto graph2 = GraphHolder::getInstance().cloneGraph(graphId); ASSERT_TRUE(graph != graph2); ASSERT_TRUE(graph2 != nullptr); - GraphHolder::getInstance()->dropGraph(graphId); + GraphHolder::getInstance().dropGraph(graphId); - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId)); delete graph2; diff --git a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp index 878b05712..16c1ed623 100644 --- a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp @@ -31,13 +31,13 @@ using namespace sd::graph; class GraphStateTests : public testing::Test { public: GraphStateTests() { - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); }; ~GraphStateTests() { - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); } }; diff --git a/libnd4j/tests_cpu/layers_tests/GraphTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphTests.cpp index 73aac9c3b..6d21b00f2 100644 --- a/libnd4j/tests_cpu/layers_tests/GraphTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/GraphTests.cpp @@ -39,8 +39,8 @@ public: int fShape[] = {2, 2, 2, 1, 2, 0, 1, 102}; */ GraphTests() { - //Environment::getInstance()->setDebug(true); - //Environment::getInstance()->setVerbose(true); + //Environment::getInstance().setDebug(true); + //Environment::getInstance().setVerbose(true); } }; @@ -910,7 +910,7 @@ TEST_F(GraphTests, TestMultiOutput1) { auto nodeB0 = new Node(OpType_TRANSFORM_SAME, transform::Abs, 2, {-2}, {11}); nodeB0->markInplace(false); - auto op = sd::ops::OpRegistrator::getInstance()->getOperation("testop2i2o"); + auto op = sd::ops::OpRegistrator::getInstance().getOperation("testop2i2o"); // this op will add 1.0 to first input, and 2.0 for second input auto nodeT = new Node(op, 11, {1, 2}, {21, 31}, {}, 0.0f); @@ -951,7 +951,7 @@ TEST_F(GraphTests, TestMultiOutput1) { } TEST_F(GraphTests, TestDivergentNode1) { - auto op = sd::ops::OpRegistrator::getInstance()->getOperation("Switch"); + auto op = sd::ops::OpRegistrator::getInstance().getOperation("Switch"); auto nodeY = new Node(op, 1); ASSERT_TRUE(nodeY->isDivergencePoint()); diff --git a/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp b/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp index da513f7d4..431a4bc14 100644 --- a/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp @@ -29,7 +29,7 @@ class HashUtilsTests : public testing::Test { TEST_F(HashUtilsTests, TestEquality1) { std::string str("Conv2D"); - Nd4jLong hash1 = sd::ops::HashHelper::getInstance()->getLongHash(str); + Nd4jLong hash1 = sd::ops::HashHelper::getInstance().getLongHash(str); ASSERT_EQ(-1637140380760460323L, hash1); } @@ -38,6 +38,6 @@ TEST_F(HashUtilsTests, TestEquality1) { TEST_F(HashUtilsTests, TestEquality2) { std::string str("switch"); - Nd4jLong hash1 = sd::ops::HashHelper::getInstance()->getLongHash(str); + Nd4jLong hash1 = sd::ops::HashHelper::getInstance().getLongHash(str); ASSERT_EQ(-1988317239813741487L, hash1); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index e6992d7a2..23080161a 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -356,8 +356,8 @@ TEST_F(JavaInteropTests, TestInplace_1) { } TEST_F(JavaInteropTests, Test_Synonyms_1) { - auto op = OpRegistrator::getInstance()->getOperation("RDiv"); - auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide"); + auto op = OpRegistrator::getInstance().getOperation("RDiv"); + auto opRef = OpRegistrator::getInstance().getOperation("reversedivide"); std::string nameExp("reversedivide"); ASSERT_TRUE(op != nullptr); @@ -371,8 +371,8 @@ TEST_F(JavaInteropTests, Test_Synonyms_1) { } TEST_F(JavaInteropTests, Test_Synonyms_2) { - auto op = OpRegistrator::getInstance()->getOperation("RDiv"); - auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide"); + auto op = OpRegistrator::getInstance().getOperation("RDiv"); + auto opRef = OpRegistrator::getInstance().getOperation("reversedivide"); std::string nameExp("reversedivide"); ASSERT_TRUE(op != nullptr); @@ -386,8 +386,8 @@ TEST_F(JavaInteropTests, Test_Synonyms_2) { } TEST_F(JavaInteropTests, Test_Synonyms_3) { - auto op = OpRegistrator::getInstance()->getOperation("RDiv"); - auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide"); + auto op = OpRegistrator::getInstance().getOperation("RDiv"); + auto opRef = OpRegistrator::getInstance().getOperation("reversedivide"); std::string nameExp("reversedivide"); ASSERT_TRUE(op != nullptr); @@ -486,7 +486,7 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), x.specialShapeInfo()}; Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.specialShapeInfo()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.special()}; auto result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); @@ -563,19 +563,19 @@ TEST_F(JavaInteropTests, Test_GraphReuse_1) { registerGraph(nullptr, 119, (Nd4jPointer) data); - ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119)); + ASSERT_TRUE(GraphHolder::getInstance().hasGraph(119)); unregisterGraph(nullptr, 119); - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119)); delete[] data; } TEST_F(JavaInteropTests, Test_GraphReuse_2) { - //Environment::getInstance()->setDebug(true); - //Environment::getInstance()->setVerbose(true); + //Environment::getInstance().setDebug(true); + //Environment::getInstance().setVerbose(true); auto exp0 = NDArrayFactory::create('c', {3}, {3, 3, 3}); auto exp1 = NDArrayFactory::create('c', {3}, {6, 6, 6}); @@ -585,13 +585,13 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { uint8_t* data = sd::graph::readFlatBuffers("./resources/reduce_dim_false.fb"); // we ensure that there's no such a graph stored earlier - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119)); // register the graph, to call for it later registerGraph(nullptr, 119, (Nd4jPointer) data); // and ensure we're ok - ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119)); + ASSERT_TRUE(GraphHolder::getInstance().hasGraph(119)); @@ -647,7 +647,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { //////// clean out unregisterGraph(nullptr, 119); - ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); + ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119)); delete[] data; @@ -830,8 +830,8 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) { extraPointers = new Nd4jPointer[6] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {0,1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {0,1}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {0,1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {0,1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dims}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -853,14 +853,14 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) { /* TEST_F(JavaInteropTests, Test_SimpleIf_Output) { - Environment::getInstance()->setDebug(true); - Environment::getInstance()->setVerbose(false); + Environment::getInstance().setDebug(true); + Environment::getInstance().setVerbose(false); auto pl = sd::graph::readFlatBuffers("./resources/simpleif_0_1.fb"); auto ptr = executeFlatGraph(nullptr, pl); - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); delete[] pl; delete ptr; @@ -979,7 +979,7 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_float) { } TEST_F(JavaInteropTests, Test_Mixed_Add_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto arrayX = NDArrayFactory::create({1, 2, 3, 4}); @@ -1226,7 +1226,7 @@ TEST_F(JavaInteropTests, Test_Fastpath_7) { } TEST_F(JavaInteropTests, test_bfloat16_rng) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto z = NDArrayFactory::create('c', {10}); @@ -1307,7 +1307,7 @@ TEST_F(JavaInteropTests, test_expandable_array_op_1) { } TEST_F(JavaInteropTests, test_workspace_backed_arrays_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto x = NDArrayFactory::create('c', {4, 3, 4, 4}); @@ -1338,7 +1338,7 @@ TEST_F(JavaInteropTests, test_workspace_backed_arrays_1) { } TEST_F(JavaInteropTests, test_linspace_shape_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; sd::ops::lin_space op; diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu index 622ce9fbb..922d94afd 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu @@ -48,7 +48,7 @@ TEST_F(LegacyOpsCudaTests, test_sortTad_1) { auto e = NDArrayFactory::create('c', {3, 5}, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f}); int axis = 1; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), axis); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), axis); Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp index 7c7734b38..fe9c5a7a0 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp @@ -394,7 +394,7 @@ TEST_F(LegacyOpsTests, BroadcastingTests_2) { int axis = 1; // shape::printShapeInfoLinear("tad shape", tad.tadOnlyShapeInfo); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {axis}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {axis}); NDArray::prepareSpecialUse({&y}, {&x}); @@ -466,8 +466,8 @@ TEST_F(LegacyOpsTests, Reduce3_2) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -506,8 +506,8 @@ TEST_F(LegacyOpsTests, Reduce3_3) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -546,8 +546,8 @@ TEST_F(LegacyOpsTests, Reduce3_4) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -588,8 +588,8 @@ TEST_F(LegacyOpsTests, Reduce3_5) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); @@ -616,8 +616,8 @@ TEST_F(LegacyOpsTests, test_Reduce3_All_1) { auto z = NDArrayFactory::create('c', {1000, 1}); auto dim = NDArrayFactory::create('c', {1}, {-1}); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), -1); - auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), -1); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), -1); + auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), -1); sd::LaunchContext* context = sd::LaunchContext::defaultContext(); @@ -652,7 +652,7 @@ TEST_F(LegacyOpsTests, test_inverse_broadcast_1) { auto e = NDArrayFactory::create('c', {3, 4}); e.assign(2.0f); - auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), 1); + auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), 1); y.tickWriteDevice(); @@ -680,7 +680,7 @@ TEST_F(LegacyOpsTests, test_inverse_broadcast_2) { auto erow = e(1, {0}); erow.assign(true); - auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), 1); + auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), 1); z.tickWriteDevice(); @@ -739,7 +739,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_3) { } TEST_F(LegacyOpsTests, test_legacy_reduce_empty_4) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; int a = 0; diff --git a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp index c1df42fd1..7200dc034 100644 --- a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp @@ -34,7 +34,7 @@ public: TEST_F(MmapTests, Test_Basic_Mmap_1) { // FIXME: we must adopt this for CUDA as well - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; // just 10GB diff --git a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp index 803029216..79f2ffa1e 100644 --- a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp @@ -56,7 +56,7 @@ TEST_F(MultiDataTypeTests, DataTypeUtils_Test_3) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); @@ -70,7 +70,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); @@ -84,7 +84,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_3) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0, 1, 2, 3, 4, 5}); @@ -98,7 +98,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_3) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_4) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); @@ -112,7 +112,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_4) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_5) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0, 1, 2, 3, 4, 5}); @@ -125,7 +125,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_5) { } TEST_F(MultiDataTypeTests, Basic_Test_7) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0, 1, 2, 3, 4, 5}); @@ -143,7 +143,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_7) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, Basic_Test_6) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; auto x = NDArrayFactory::create('c', {2, 3}, {0, 1, 2, 3, 4, 5}); @@ -301,7 +301,7 @@ TEST_F(MultiDataTypeTests, ndarray_varianceNumber_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -316,7 +316,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -340,7 +340,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -355,7 +355,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -382,7 +382,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test2) { //////////////////////////////////////////////////////////////////////////////// multiply TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -397,7 +397,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -422,7 +422,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test2) { //////////////////////////////////////////////////////////////////////////////// multiply TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {4, 1, 2, 3}, sd::DataType::HALF); @@ -438,7 +438,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2, 2}, {1, 2, 3, 4}, sd::DataType::INT64); @@ -470,7 +470,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray scalar1('c', {0}, std::vector{4}, sd::DataType::INT32); @@ -510,7 +510,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32); @@ -548,7 +548,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray scalar1('c', {0}, std::vector{4}, sd::DataType::INT32); @@ -588,7 +588,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32); @@ -626,7 +626,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray scalar1('c', {0}, std::vector{3}, sd::DataType::INT32); @@ -666,7 +666,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32); @@ -704,7 +704,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray scalar1('c', {0}, std::vector{3}, sd::DataType::INT32); @@ -744,7 +744,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 2, 4, 6}, sd::DataType::FLOAT32); @@ -782,7 +782,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test2) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_reduceNumberFloat_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -819,7 +819,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberFloat_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_reduceNumberSame_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -856,7 +856,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberSame_test1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_reduceNumberBool_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, -1, 2, -3}, sd::DataType::INT64); @@ -889,7 +889,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberBool_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_reduceNumberLong_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -925,7 +925,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberLong_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_indexReduceNumber_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT32); @@ -948,7 +948,7 @@ TEST_F(MultiDataTypeTests, ndarray_indexReduceNumber_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTransformFloat_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 4, 9, 16}, sd::DataType::INT64); @@ -986,7 +986,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformFloat_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTransformSame_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -1031,7 +1031,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformSame_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTransformBool_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -1067,7 +1067,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformBool_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTransformStrict_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::HALF); @@ -1113,7 +1113,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformStrict_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,3}, {0, 1, 2, 3, 4, 5}, sd::DataType::INT32); @@ -1147,7 +1147,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,3}, {1, 1, 2, 3, 4, 5}, sd::DataType::INT32); @@ -1176,7 +1176,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test2) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyBroadcast_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,3}, {10, 20, 30, 40, 50, 60}, sd::DataType::INT32); @@ -1222,7 +1222,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyBroadcast_test2) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {10, 20, 30, 40}, sd::DataType::INT32); @@ -1281,7 +1281,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test1) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {10, 20, 30, 40}, sd::DataType::HALF); @@ -1310,7 +1310,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test2) { ////////////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, ndarray_applyScalar_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64); @@ -1697,7 +1697,7 @@ TEST_F(MultiDataTypeTests, applyAllReduce3_test1) { ////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, RowCol_test1) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,3}, {1,2,3,4,5,6}, sd::DataType::INT32); @@ -1726,7 +1726,7 @@ TEST_F(MultiDataTypeTests, RowCol_test1) { ////////////////////////////////////////////////////////////////////// TEST_F(MultiDataTypeTests, RowCol_test2) { - if (!Environment::getInstance()->isExperimentalBuild()) + if (!Environment::getInstance().isExperimentalBuild()) return; NDArray x1('c', {2,3}, {1,2,3,4,5,6}, sd::DataType::INT32); diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu index f95705f08..01510dc91 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu @@ -686,7 +686,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_PrimitiveCosine_3) { TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { - //if (!Environment::getInstance()->isExperimentalBuild()) + //if (!Environment::getInstance().isExperimentalBuild()) // return; NDArray x = NDArrayFactory::create('c', {2,3,4}); @@ -746,7 +746,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) { - //if (!Environment::getInstance()->isExperimentalBuild()) + //if (!Environment::getInstance().isExperimentalBuild()) // return; NDArray x('c', {2,3,4}, sd::DataType::DOUBLE); @@ -944,7 +944,7 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastMultiply_002) { //////////////////////////////////////////////////////////////////////////// TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) { - //if (!Environment::getInstance()->isExperimentalBuild()) + //if (!Environment::getInstance().isExperimentalBuild()) // return; NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 3d0df208f..2f87b5099 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -95,7 +95,7 @@ TEST_F(NativeOpsTests, ThresholdTests_1) { printf("Unsupported for cuda now.\n"); #else ::setElementThreshold(4); - ASSERT_TRUE(4 == sd::Environment::getInstance()->elementwiseThreshold()); + ASSERT_TRUE(4 == sd::Environment::getInstance().elementwiseThreshold()); #endif } @@ -107,7 +107,7 @@ TEST_F(NativeOpsTests, ThresholdTests_2) { printf("Unsupported for cuda now.\n"); #else ::setTADThreshold(4); - ASSERT_TRUE(4 == sd::Environment::getInstance()->tadThreshold()); + ASSERT_TRUE(4 == sd::Environment::getInstance().tadThreshold()); #endif } @@ -644,8 +644,8 @@ TEST_F(NativeOpsTests, Reduce3Test_4) { x.syncToDevice(); dimension.syncToHost(); int* dimensions = reinterpret_cast(dimension.buffer()); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf()); auto hTADShapeInfoX = tadPackX.primaryShapeInfo(); auto hTADOffsetsX = tadPackX.primaryOffsets(); @@ -963,8 +963,8 @@ TEST_F(NativeOpsTests, ScalarTadTest_1) { z.syncToDevice(); auto dimension = NDArrayFactory::create({0, 1}); auto dimensions = reinterpret_cast(dimension.buffer()); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer yBuf(y.dataBuffer()); @@ -1008,8 +1008,8 @@ TEST_F(NativeOpsTests, ScalarTadTest_2) { z.syncToDevice(); auto dimension = NDArrayFactory::create({0, 1}); auto dimensions = reinterpret_cast(dimension.buffer()); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); z.assign(true); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -1057,8 +1057,8 @@ TEST_F(NativeOpsTests, ConcatTest_2) { int d = 0; auto dimension = NDArrayFactory::create('c', {1}, {d}); auto dimensions = reinterpret_cast(dimension.buffer()); - //auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); + //auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); + auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); exp.linspace(1); Nd4jPointer datas[] = {x.buffer(), y.buffer()}; Nd4jPointer shapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()}; @@ -1125,8 +1125,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -1230,7 +1230,7 @@ TEST_F(NativeOpsTests, ShuffleTest_1) { Nd4jPointer zShapeList[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.shapeInfo()}; Nd4jPointer dzShapeList[] = {(Nd4jPointer)z.specialShapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; int shuffleMap[] = {1, 0, 4, 3, 2}; - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1}); Nd4jPointer zListOffset[] = {(Nd4jPointer)zTadPack.platformOffsets(), (Nd4jPointer)zTadPack.platformOffsets()}; Nd4jPointer zListTADs[] = {(Nd4jPointer)zTadPack.platformShapeInfo(), (Nd4jPointer)zTadPack.platformShapeInfo()}; ::shuffle(nullptr, @@ -1411,7 +1411,7 @@ TEST_F(NativeOpsTests, SortTest_4) { auto exp = NDArrayFactory::create('c', {3, 6}, {1, 5, 5, 10, 34, 120, 3, 29, 78, 111, 138, 331, 4, 50, 56, 71, 73, 91}); std::vector dims({1}); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), {1}); + auto packX = ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), {1}); ::sortTad(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(), sortedVals.specialShapeInfo(), dims.data(), dims.size(), packX.platformShapeInfo(), packX.platformOffsets(), false); // sortedVals.printBuffer("OUT"); diff --git a/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp b/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp index a7c7eae24..af327d653 100644 --- a/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp @@ -31,12 +31,12 @@ private: int ewt = 0; public: OmpLaunchHelperTests() { - this->ewt = Environment::getInstance()->elementwiseThreshold(); - Environment::getInstance()->setElementwiseThreshold(1000); + this->ewt = Environment::getInstance().elementwiseThreshold(); + Environment::getInstance().setElementwiseThreshold(1000); }; ~OmpLaunchHelperTests() { - Environment::getInstance()->setElementwiseThreshold(this->ewt); + Environment::getInstance().setElementwiseThreshold(this->ewt); } }; @@ -85,7 +85,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_1) { Nd4jLong numTads = 16; Nd4jLong tadLength = 16; -// nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance()->tadThreshold(), Environment::getInstance()->elementwiseThreshold()); +// nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance().tadThreshold(), Environment::getInstance().elementwiseThreshold()); ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads)); } @@ -94,7 +94,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_2) { return; Nd4jLong numTads = 2; - Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold(); + Nd4jLong tadLength = Environment::getInstance().elementwiseThreshold(); ASSERT_EQ(2, OmpLaunchHelper::tadThreads(tadLength, numTads)); } @@ -117,7 +117,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_5) { auto exp = omp_get_max_threads(); Nd4jLong numTads = exp; - Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold(); + Nd4jLong tadLength = Environment::getInstance().elementwiseThreshold(); ASSERT_EQ(exp, OmpLaunchHelper::tadThreads(tadLength, numTads)); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp b/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp index fe581e09e..a14971ad5 100644 --- a/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp @@ -42,24 +42,24 @@ public: TEST_F(OpTrackerTests, Test_Existence_1) { sd::_loader loader; - // nd4j_printf("Groups: %i; Operations: %i\n", OpTracker::getInstance()->totalGroups(), OpTracker::getInstance()->totalOperations()); + // nd4j_printf("Groups: %i; Operations: %i\n", OpTracker::getInstance().totalGroups(), OpTracker::getInstance().totalOperations()); - ASSERT_TRUE(OpTracker::getInstance()->totalGroups() > 0); - ASSERT_TRUE(OpTracker::getInstance()->totalOperations() > 0); + ASSERT_TRUE(OpTracker::getInstance().totalGroups() > 0); + ASSERT_TRUE(OpTracker::getInstance().totalOperations() > 0); - OpTracker::getInstance()->exportOperations(); + OpTracker::getInstance().exportOperations(); } TEST_F(OpTrackerTests, Test_Ops_List_1) { sd::ops::less op; - auto vec = OpRegistrator::getInstance()->getAllHashes(); + auto vec = OpRegistrator::getInstance().getAllHashes(); // nd4j_printf("Total ops: %lld\n", vec.size()); // nd4j_printf("Less hash: %lld\n", op.getOpHash()); for (const auto &v: vec) { if (v == 5484196977525668316L) { - auto op = OpRegistrator::getInstance()->getOperation(v); + auto op = OpRegistrator::getInstance().getOperation(v); // nd4j_printf("OpName: %s\n", op->getOpName()->c_str()); } } diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index 91ddcbd30..a8f45cc48 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -124,12 +124,12 @@ TEST_F(PlaygroundTests, test_bert_full_1) { */ - sd::Environment::getInstance()->setProfiling(true); + sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); - sd::Environment::getInstance()->setProfiling(false); + sd::Environment::getInstance().setProfiling(false); delete profile; /* @@ -185,12 +185,12 @@ TEST_F(PlaygroundTests, test_bert_1) { ASSERT_EQ(z, *array); */ - sd::Environment::getInstance()->setProfiling(true); + sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); - sd::Environment::getInstance()->setProfiling(false); + sd::Environment::getInstance().setProfiling(false); delete profile; /* @@ -237,12 +237,12 @@ TEST_F(PlaygroundTests, test_bert_2) { ASSERT_EQ(z, *array); */ - sd::Environment::getInstance()->setProfiling(true); + sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); - sd::Environment::getInstance()->setProfiling(false); + sd::Environment::getInstance().setProfiling(false); delete profile; /* @@ -631,7 +631,7 @@ TEST_F(PlaygroundTests, test_s_0) { for (auto shape: shapes) { for (auto t: threads) { - sd::Environment::getInstance()->setMaxMasterThreads(t); + sd::Environment::getInstance().setMaxMasterThreads(t); auto x = NDArrayFactory::create('c', shape); auto y = NDArrayFactory::create('c', {shape[3]}); @@ -670,7 +670,7 @@ TEST_F(PlaygroundTests, test_s_1) { for (auto shape: shapes) { for (auto t: threads) { - sd::Environment::getInstance()->setMaxMasterThreads(t); + sd::Environment::getInstance().setMaxMasterThreads(t); auto x = NDArrayFactory::create('c', shape); auto y = NDArrayFactory::create('c', {shape[1]}); diff --git a/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp b/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp index e0d03731b..50c1f4b19 100644 --- a/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp @@ -29,13 +29,13 @@ using namespace sd::graph; class ServerRelatedTests : public testing::Test { public: ServerRelatedTests() { - Environment::getInstance()->setDebug(true); - Environment::getInstance()->setVerbose(true); + Environment::getInstance().setDebug(true); + Environment::getInstance().setVerbose(true); } ~ServerRelatedTests() { - Environment::getInstance()->setDebug(false); - Environment::getInstance()->setVerbose(false); + Environment::getInstance().setDebug(false); + Environment::getInstance().setVerbose(false); } }; /* @@ -89,9 +89,9 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_1) { auto exp = NDArrayFactory::create('c', {3}, {3.f, 3.f, 3.f}); - GraphHolder::getInstance()->registerGraph(11901L, oGraph); + GraphHolder::getInstance().registerGraph(11901L, oGraph); - auto cGraph = GraphHolder::getInstance()->cloneGraph(11901L); + auto cGraph = GraphHolder::getInstance().cloneGraph(11901L); ASSERT_TRUE(oGraph != cGraph); @@ -108,7 +108,7 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_1) { delete cGraph; - GraphHolder::getInstance()->dropGraphAny(11901L); + GraphHolder::getInstance().dropGraphAny(11901L); } TEST_F(ServerRelatedTests, Basic_Execution_Test_2) { @@ -120,9 +120,9 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_2) { auto input0 = NDArrayFactory::create('c', {3, 3}, {2.f,2.f,2.f, 2.f,2.f,2.f, 2.f,2.f,2.f}); auto exp = NDArrayFactory::create('c', {3}, {6.f, 6.f, 6.f}); - GraphHolder::getInstance()->registerGraph(11902L, oGraph); + GraphHolder::getInstance().registerGraph(11902L, oGraph); - auto cGraph = GraphHolder::getInstance()->cloneGraph(11902L); + auto cGraph = GraphHolder::getInstance().cloneGraph(11902L); ASSERT_TRUE(oGraph != cGraph); @@ -148,7 +148,7 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_2) { delete cGraph; - GraphHolder::getInstance()->dropGraphAny(11902L); + GraphHolder::getInstance().dropGraphAny(11902L); } TEST_F(ServerRelatedTests, BasicExecutionTests_3) { @@ -160,7 +160,7 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) { auto input0 = NDArrayFactory::create('c', {3, 3}, {2.f,2.f,2.f, 2.f,2.f,2.f, 2.f,2.f,2.f}); auto exp = NDArrayFactory::create('c', {3}, {6.f, 6.f, 6.f}); - GraphHolder::getInstance()->registerGraph(11903L, oGraph); + GraphHolder::getInstance().registerGraph(11903L, oGraph); // mastering InferenceRequest InferenceRequest ir(11903L); @@ -172,7 +172,7 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) { auto fir = GetFlatInferenceRequest(fptr); - auto flatResult = GraphHolder::getInstance()->execute(fir->id(), builder, fir); + auto flatResult = GraphHolder::getInstance().execute(fir->id(), builder, fir); builder.Finish(flatResult); auto ptr = builder.GetBufferPointer(); @@ -183,6 +183,6 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) { ASSERT_EQ(exp, *restored.at(0)->getNDArray()); - GraphHolder::getInstance()->dropGraphAny(11903L); + GraphHolder::getInstance().dropGraphAny(11903L); } #endif diff --git a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp index 4dcedf035..a31547561 100644 --- a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp @@ -34,7 +34,7 @@ public: TEST_F(SortCpuTests, test_linear_sort_by_key_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto k = NDArrayFactory::create('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8}); @@ -51,7 +51,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_key_1) { } TEST_F(SortCpuTests, test_linear_sort_by_val_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto k = NDArrayFactory::create('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8}); @@ -68,7 +68,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_val_1) { } TEST_F(SortCpuTests, test_tad_sort_by_key_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto k = NDArrayFactory::create('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8}); @@ -86,7 +86,7 @@ TEST_F(SortCpuTests, test_tad_sort_by_key_1) { } TEST_F(SortCpuTests, test_tad_sort_by_val_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto k = NDArrayFactory::create('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8}); diff --git a/libnd4j/tests_cpu/layers_tests/TadTests.cpp b/libnd4j/tests_cpu/layers_tests/TadTests.cpp index a2cdec003..947927bfb 100644 --- a/libnd4j/tests_cpu/layers_tests/TadTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/TadTests.cpp @@ -245,13 +245,13 @@ TEST_F(TadTests, test_tad_order_4) { TEST_F(TadTests, test_column_1) { auto x = NDArrayFactory::create('c', {5, 2}); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), 0); + auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), 0); ASSERT_EQ(1, shape::rank(tadPack.primaryShapeInfo())); ASSERT_EQ(5, shape::length(tadPack.primaryShapeInfo())); ASSERT_TRUE(shape::isVector(tadPack.primaryShapeInfo())); - auto scalarViewPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(tadPack.primaryShapeInfo(), 0); + auto scalarViewPack = sd::ConstantTadHelper::getInstance().tadForDimensions(tadPack.primaryShapeInfo(), 0); ASSERT_TRUE(shape::equalsStrict(tadPack.primaryShapeInfo(), scalarViewPack.primaryShapeInfo())); } diff --git a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp index a9450e9d0..71957bc59 100644 --- a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp @@ -208,9 +208,31 @@ TEST_F(ThreadsTests, reduction_test_1) { ASSERT_EQ(8192, sum); } +static void _code(int thread_id) { + auto x = NDArrayFactory::create('c', {65536 * 16}); + x.assign(1.1f); +} + +TEST_F(ThreadsTests, crash_test_1) { + if (!Environment::getInstance().isCPU()) + return; + + for (int e = 0; e < 3; e++) { + std::vector threads(std::thread::hardware_concurrency()); + + // creating some threads + for (int t = 0; t < threads.size(); t++) + threads[t] = std::thread(_code, t); + + // blocking until everything is finished + for (auto &t:threads) + t.join(); + } +} + /* TEST_F(ThreadsTests, basic_test_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; auto instance = samediff::ThreadPool::getInstance(); diff --git a/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp b/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp index 571db71f3..b291e5fbb 100644 --- a/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp @@ -112,7 +112,7 @@ TEST_F(WorkspaceTests, ResetTest1) { TEST_F(WorkspaceTests, StretchTest1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; Workspace workspace(128); @@ -147,7 +147,7 @@ TEST_F(WorkspaceTests, StretchTest1) { } TEST_F(WorkspaceTests, NewInWorkspaceTest1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; Workspace ws(65536); @@ -155,11 +155,11 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest1) { ASSERT_EQ(65536, ws.getCurrentSize()); ASSERT_EQ(0, ws.getCurrentOffset()); - ASSERT_FALSE(MemoryRegistrator::getInstance()->hasWorkspaceAttached()); + ASSERT_FALSE(MemoryRegistrator::getInstance().hasWorkspaceAttached()); - MemoryRegistrator::getInstance()->attachWorkspace(&ws); + MemoryRegistrator::getInstance().attachWorkspace(&ws); - ASSERT_TRUE(MemoryRegistrator::getInstance()->hasWorkspaceAttached()); + ASSERT_TRUE(MemoryRegistrator::getInstance().hasWorkspaceAttached()); auto ast = NDArrayFactory::create_('c', {5, 5}); @@ -167,10 +167,10 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest1) { delete ast; - MemoryRegistrator::getInstance()->forgetWorkspace(); + MemoryRegistrator::getInstance().forgetWorkspace(); - ASSERT_FALSE(MemoryRegistrator::getInstance()->hasWorkspaceAttached()); - ASSERT_TRUE(MemoryRegistrator::getInstance()->getWorkspace() == nullptr); + ASSERT_FALSE(MemoryRegistrator::getInstance().hasWorkspaceAttached()); + ASSERT_TRUE(MemoryRegistrator::getInstance().getWorkspace() == nullptr); } @@ -182,7 +182,7 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest2) { ASSERT_EQ(65536, ws.getCurrentSize()); ASSERT_EQ(0, ws.getCurrentOffset()); - MemoryRegistrator::getInstance()->attachWorkspace(&ws); + MemoryRegistrator::getInstance().attachWorkspace(&ws); auto ast = NDArrayFactory::create_('c', {5, 5}, &ctx); @@ -190,11 +190,11 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest2) { delete ast; - MemoryRegistrator::getInstance()->forgetWorkspace(); + MemoryRegistrator::getInstance().forgetWorkspace(); } TEST_F(WorkspaceTests, CloneTest1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; Workspace ws(65536); @@ -250,7 +250,7 @@ TEST_F(WorkspaceTests, Test_Graph_1) { #endif TEST_F(WorkspaceTests, Test_Externalized_1) { - if (!Environment::getInstance()->isCPU()) + if (!Environment::getInstance().isCPU()) return; char buffer[10000]; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index ae9ff1e94..b4bd62096 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -1119,7 +1119,7 @@ public interface NativeOps { */ int dataTypeFromNpyHeader(Pointer numpyHeader); - OpaqueConstantDataBuffer shapeBuffer(int rank, LongPointer shape, LongPointer strides, int dtype, char order, long ews, boolean empty); + OpaqueConstantShapeBuffer shapeBuffer(int rank, LongPointer shape, LongPointer strides, int dtype, char order, long ews, boolean empty); OpaqueConstantDataBuffer constantBufferDouble(int dtype, DoublePointer data, int length); @@ -1128,9 +1128,12 @@ public interface NativeOps { Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf); Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf); long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf); - long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf); - void deleteShapeBuffer(OpaqueConstantDataBuffer state); + Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf); + Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf); + + void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer state); + void deleteConstantDataBuffer(OpaqueConstantDataBuffer state); OpaqueContext createGraphContext(int nodeId); OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java new file mode 100644 index 000000000..977747fb6 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2019 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.nativeblas; + +import org.bytedeco.javacpp.Pointer; + +/** + * + * @author saudet + */ +public class OpaqueConstantShapeBuffer extends Pointer { + public OpaqueConstantShapeBuffer(Pointer p) { super(p); } +} diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index afca1daa5..65bfa24fc 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -2156,14 +2156,14 @@ public class CudaExecutioner extends DefaultOpExecutioner { if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); - OpaqueConstantDataBuffer dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + val dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); - val result = new CudaLongDataBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), Shape.shapeInfoLength(shape.length)); + val result = new CudaLongDataBuffer(nativeOps.getConstantShapeBufferPrimary(dbf), nativeOps.getConstantShapeBufferSpecial(dbf), Shape.shapeInfoLength(shape.length)); - nativeOps.deleteShapeBuffer(dbf); + nativeOps.deleteConstantShapeBuffer(dbf); return result; } @@ -2191,7 +2191,7 @@ public class CudaExecutioner extends DefaultOpExecutioner { if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); - OpaqueConstantDataBuffer dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length); + val dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length); if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); @@ -2207,7 +2207,7 @@ public class CudaExecutioner extends DefaultOpExecutioner { if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); - OpaqueConstantDataBuffer dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length); + val dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length); if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index cc6ffc19a..38c7188f1 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -469,6 +469,73 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #endif //DEV_TESTS_DATABUFFER_H +// Parsed from array/PointerDeallocator.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_POINTERDEALLOCATOR_H_ +// #define SD_POINTERDEALLOCATOR_H_ + +// #include +// #include + + + +// #endif //SD_POINTERDEALLOCATOR_H_ + + +// Parsed from array/PointerWrapper.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_POINTER_H_ +// #define SD_ARRAY_POINTER_H_ + +// #include +// #include +// #include +// #include + // namespace sd + +// #endif //SD_ARRAY_POINTER_H_ + + // Parsed from array/ConstantDescriptor.h /******************************************************************************* @@ -581,6 +648,9 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #include // #include +// #include +// #include +// #include @Namespace("sd") @NoOffset public static class ConstantDataBuffer extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -592,18 +662,16 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { return (ConstantDataBuffer)super.position(position); } - public ConstantDataBuffer(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf) { super((Pointer)null); allocate(primary, special, numEelements, sizeOf); } - private native void allocate(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf); public ConstantDataBuffer(@Const @ByRef ConstantDataBuffer other) { super((Pointer)null); allocate(other); } private native void allocate(@Const @ByRef ConstantDataBuffer other); public ConstantDataBuffer() { super((Pointer)null); allocate(); } private native void allocate(); - public native @Cast("Nd4jLong") long sizeOf(); - public native @Cast("Nd4jLong") long length(); + public native @Cast("uint8_t") byte sizeOf(); + public native @Cast("uint64_t") long length(); - public native @Cast("Nd4jPointer") Pointer primary(); - public native @Cast("Nd4jPointer") Pointer special(); + public native Pointer primary(); + public native Pointer special(); public native @ByRef @Name("operator =") ConstantDataBuffer put(@Const @ByRef ConstantDataBuffer other); } @@ -612,6 +680,114 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #endif //DEV_TESTS_CONSTANTDATABUFFER_H +// Parsed from array/ConstantShapeBuffer.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_ +// #define SD_ARRAY_CONSTANTSHAPEBUFFER_H_ + +// #include +// #include +// #include +// #include + +@Namespace("sd") public static class ConstantShapeBuffer extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ConstantShapeBuffer(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ConstantShapeBuffer(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ConstantShapeBuffer position(long position) { + return (ConstantShapeBuffer)super.position(position); + } + + public ConstantShapeBuffer() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native @Cast("const Nd4jLong*") LongPointer primary(); + public native @Cast("const Nd4jLong*") LongPointer special(); + public native @Cast("const Nd4jLong*") LongPointer platform(); +} + + // namespace sd + +// #endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_ + + +// Parsed from array/ConstantOffsetsBuffer.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ +// #define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ + +// #include +// #include +// #include +// #include + +@Namespace("sd") public static class ConstantOffsetsBuffer extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ConstantOffsetsBuffer(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ConstantOffsetsBuffer(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ConstantOffsetsBuffer position(long position) { + return (ConstantOffsetsBuffer)super.position(position); + } + + public ConstantOffsetsBuffer() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native @Cast("const Nd4jLong*") LongPointer primary(); + public native @Cast("const Nd4jLong*") LongPointer special(); + public native @Cast("const Nd4jLong*") LongPointer platform(); +} + + // namespace sd + +// #endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ + + // Parsed from array/TadPack.h /******************************************************************************* @@ -637,7 +813,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #ifndef DEV_TESTS_TADPACK_H // #define DEV_TESTS_TADPACK_H -// #include "ConstantDataBuffer.h" +// #include +// #include @Namespace("sd") @NoOffset public static class TadPack extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -649,8 +826,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { return (TadPack)super.position(position); } - public TadPack(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); } - private native void allocate(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads); + public TadPack(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); } + private native void allocate(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads); public TadPack() { super((Pointer)null); allocate(); } private native void allocate(); @@ -859,7 +1036,7 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { public native int _blasMinorVersion(); public native Environment _blasMinorVersion(int setter); public native int _blasPatchVersion(); public native Environment _blasPatchVersion(int setter); - public static native Environment getInstance(); + public static native @ByRef Environment getInstance(); public native @Cast("bool") boolean isVerbose(); public native void setVerbose(@Cast("bool") boolean reallyVerbose); @@ -3048,9 +3225,9 @@ public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointe public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") long[] specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length); public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length); @@ -3063,9 +3240,12 @@ public native OpaqueConstantDataBuffer constantBuffer(@Cast("sd::DataType") int public native @Cast("Nd4jPointer") Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf); public native @Cast("Nd4jPointer") Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf); public native @Cast("Nd4jLong") long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf); -public native @Cast("Nd4jLong") long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf); -public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr); +public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf); +public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf); + +public native void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer ptr); +public native void deleteConstantDataBuffer(OpaqueConstantDataBuffer ptr); public native OpaqueContext createGraphContext(int nodeId); public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr); @@ -3639,6 +3819,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include @@ -4478,7 +4659,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setShapeInfo(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype); public native void setShapeInfo(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype); public native void setShapeInfo(@Const @ByRef ShapeDescriptor descriptor); - public native void setShapeInfo(@Const @ByRef ConstantDataBuffer shapeBuffer); + public native void setShapeInfo(@Const @ByRef ConstantShapeBuffer shapeBuffer); /** * returns absolute offset which corresponds to given sequential index @@ -7202,13 +7383,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param rank the rank of the shape */ - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shapeInfo); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shapeInfo); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shapeInfo); /** * Returns the shape portion of an information * buffer @@ -10143,7 +10324,7 @@ public static final int PREALLOC_SIZE = 33554432; public OpRegistrator(Pointer p) { super(p); } - public static native OpRegistrator getInstance(); + public static native @ByRef OpRegistrator getInstance(); public static native void exitHandler(); public static native void sigIntHandler(int sig); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java index 05b335c87..6ac8e133a 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java @@ -34,8 +34,12 @@ import org.bytedeco.javacpp.tools.InfoMapper; value = {@Platform(define = "LIBND4J_ALL_OPS", include = { "array/DataType.h", "array/DataBuffer.h", + "array/PointerDeallocator.h", + "array/PointerWrapper.h", "array/ConstantDescriptor.h", "array/ConstantDataBuffer.h", + "array/ConstantShapeBuffer.h", + "array/ConstantOffsetsBuffer.h", "array/TadPack.h", "execution/ErrorReference.h", "execution/Engine.h", @@ -166,6 +170,8 @@ public class Nd4jCudaPresets implements LoadEnabled, InfoMapper { .put(new Info("OpaqueVariablesSet").pointerTypes("OpaqueVariablesSet")) .put(new Info("OpaqueVariable").pointerTypes("OpaqueVariable")) .put(new Info("OpaqueConstantDataBuffer").pointerTypes("OpaqueConstantDataBuffer")) + .put(new Info("OpaqueConstantShapeBuffer").pointerTypes("OpaqueConstantShapeBuffer")) + .put(new Info("OpaqueConstantOffsetsBuffer").pointerTypes("OpaqueConstantOffsetsBuffer")) .put(new Info("OpaqueContext").pointerTypes("OpaqueContext")) .put(new Info("OpaqueRandomGenerator").pointerTypes("OpaqueRandomGenerator")) .put(new Info("OpaqueLaunchContext").pointerTypes("OpaqueLaunchContext")) @@ -187,7 +193,7 @@ public class Nd4jCudaPresets implements LoadEnabled, InfoMapper { infoMap.put(new Info("__CUDACC__", "MAX_UINT", "HAVE_MKLDNN").define(false)) .put(new Info("__JAVACPP_HACK__", "LIBND4J_ALL_OPS","__CUDABLAS__").define(true)) .put(new Info("std::initializer_list", "cnpy::NpyArray", "sd::NDArray::applyLambda", "sd::NDArray::applyPairwiseLambda", - "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray").skip()) + "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray", "std::shared_ptr", "sd::PointerWrapper", "sd::PointerDeallocator").skip()) .put(new Info("std::string").annotations("@StdString").valueTypes("BytePointer", "String") .pointerTypes("@Cast({\"char*\", \"std::string*\"}) BytePointer")) .put(new Info("std::pair").pointerTypes("IntIntPair").define()) diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java index 508144f26..5e12f1dfd 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java @@ -2018,13 +2018,13 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) { - OpaqueConstantDataBuffer dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + val dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); if (loop.lastErrorCode() != 0) throw new RuntimeException(loop.lastErrorMessage()); - val result = new LongBuffer(loop.getConstantDataBufferPrimary(dbf), Shape.shapeInfoLength(shape.length)); + val result = new LongBuffer(loop.getConstantShapeBufferPrimary(dbf), Shape.shapeInfoLength(shape.length)); - loop.deleteShapeBuffer(dbf); + loop.deleteConstantShapeBuffer(dbf); return result; } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index f17f11093..2926c06b9 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -491,6 +491,73 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #endif //DEV_TESTS_DATABUFFER_H +// Parsed from array/PointerDeallocator.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_POINTERDEALLOCATOR_H_ +// #define SD_POINTERDEALLOCATOR_H_ + +// #include +// #include + + + +// #endif //SD_POINTERDEALLOCATOR_H_ + + +// Parsed from array/PointerWrapper.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_POINTER_H_ +// #define SD_ARRAY_POINTER_H_ + +// #include +// #include +// #include +// #include + // namespace sd + +// #endif //SD_ARRAY_POINTER_H_ + + // Parsed from array/ConstantDataBuffer.h /******************************************************************************* @@ -517,6 +584,9 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #include // #include +// #include +// #include +// #include @Namespace("sd") @NoOffset public static class ConstantDataBuffer extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -528,18 +598,16 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { return (ConstantDataBuffer)super.position(position); } - public ConstantDataBuffer(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf) { super((Pointer)null); allocate(primary, special, numEelements, sizeOf); } - private native void allocate(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf); public ConstantDataBuffer(@Const @ByRef ConstantDataBuffer other) { super((Pointer)null); allocate(other); } private native void allocate(@Const @ByRef ConstantDataBuffer other); public ConstantDataBuffer() { super((Pointer)null); allocate(); } private native void allocate(); - public native @Cast("Nd4jLong") long sizeOf(); - public native @Cast("Nd4jLong") long length(); + public native @Cast("uint8_t") byte sizeOf(); + public native @Cast("uint64_t") long length(); - public native @Cast("Nd4jPointer") Pointer primary(); - public native @Cast("Nd4jPointer") Pointer special(); + public native Pointer primary(); + public native Pointer special(); public native @ByRef @Name("operator =") ConstantDataBuffer put(@Const @ByRef ConstantDataBuffer other); } @@ -548,6 +616,114 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #endif //DEV_TESTS_CONSTANTDATABUFFER_H +// Parsed from array/ConstantShapeBuffer.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_ +// #define SD_ARRAY_CONSTANTSHAPEBUFFER_H_ + +// #include +// #include +// #include +// #include + +@Namespace("sd") public static class ConstantShapeBuffer extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ConstantShapeBuffer(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ConstantShapeBuffer(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ConstantShapeBuffer position(long position) { + return (ConstantShapeBuffer)super.position(position); + } + + public ConstantShapeBuffer() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native @Cast("const Nd4jLong*") LongPointer primary(); + public native @Cast("const Nd4jLong*") LongPointer special(); + public native @Cast("const Nd4jLong*") LongPointer platform(); +} + + // namespace sd + +// #endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_ + + +// Parsed from array/ConstantOffsetsBuffer.h + +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ +// #define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ + +// #include +// #include +// #include +// #include + +@Namespace("sd") public static class ConstantOffsetsBuffer extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ConstantOffsetsBuffer(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ConstantOffsetsBuffer(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ConstantOffsetsBuffer position(long position) { + return (ConstantOffsetsBuffer)super.position(position); + } + + public ConstantOffsetsBuffer() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native @Cast("const Nd4jLong*") LongPointer primary(); + public native @Cast("const Nd4jLong*") LongPointer special(); + public native @Cast("const Nd4jLong*") LongPointer platform(); +} + + // namespace sd + +// #endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_ + + // Parsed from array/ConstantDescriptor.h /******************************************************************************* @@ -659,7 +835,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #ifndef DEV_TESTS_TADPACK_H // #define DEV_TESTS_TADPACK_H -// #include "ConstantDataBuffer.h" +// #include +// #include @Namespace("sd") @NoOffset public static class TadPack extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -671,8 +848,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { return (TadPack)super.position(position); } - public TadPack(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); } - private native void allocate(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads); + public TadPack(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); } + private native void allocate(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads); public TadPack() { super((Pointer)null); allocate(); } private native void allocate(); @@ -863,7 +1040,7 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { public native int _blasMinorVersion(); public native Environment _blasMinorVersion(int setter); public native int _blasPatchVersion(); public native Environment _blasPatchVersion(int setter); - public static native Environment getInstance(); + public static native @ByRef Environment getInstance(); public native @Cast("bool") boolean isVerbose(); public native void setVerbose(@Cast("bool") boolean reallyVerbose); @@ -3052,9 +3229,9 @@ public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointe public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") long[] specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); +public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length); public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length); @@ -3067,9 +3244,12 @@ public native OpaqueConstantDataBuffer constantBuffer(@Cast("sd::DataType") int public native @Cast("Nd4jPointer") Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf); public native @Cast("Nd4jPointer") Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf); public native @Cast("Nd4jLong") long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf); -public native @Cast("Nd4jLong") long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf); -public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr); +public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf); +public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf); + +public native void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer ptr); +public native void deleteConstantDataBuffer(OpaqueConstantDataBuffer ptr); public native OpaqueContext createGraphContext(int nodeId); public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr); @@ -3643,6 +3823,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include @@ -4482,7 +4663,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setShapeInfo(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype); public native void setShapeInfo(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype); public native void setShapeInfo(@Const @ByRef ShapeDescriptor descriptor); - public native void setShapeInfo(@Const @ByRef ConstantDataBuffer shapeBuffer); + public native void setShapeInfo(@Const @ByRef ConstantShapeBuffer shapeBuffer); /** * returns absolute offset which corresponds to given sequential index @@ -7206,13 +7387,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param rank the rank of the shape */ - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shapeInfo); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shapeInfo); - @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shapeInfo); /** * Returns the shape portion of an information * buffer @@ -10072,10 +10253,10 @@ public static final int ALL_FLOATS =BFLOAT16; // #endif -public static native @MemberGetter int ELEMENT_THRESHOLD(); -public static final int ELEMENT_THRESHOLD = ELEMENT_THRESHOLD(); -public static native @MemberGetter int TAD_THRESHOLD(); -public static final int TAD_THRESHOLD = TAD_THRESHOLD(); +public static native @MemberGetter double ELEMENT_THRESHOLD(); +public static final double ELEMENT_THRESHOLD = ELEMENT_THRESHOLD(); +public static native @MemberGetter double TAD_THRESHOLD(); +public static final double TAD_THRESHOLD = TAD_THRESHOLD(); // #define SHAPELIST(...) new ShapeList({__VA_ARGS__}, block.workspace() != nullptr) @@ -10085,8 +10266,8 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #define PRINT_FIRST(...) printf(__VA_ARGS__); fflush(stdout) // #endif -// #define DEBUG_CALL(STREAM) if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; } -// #define DEBUG_KERNEL(STREAM, OP_NUM) if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString(__LINE__); throw std::runtime_error(tOp.c_str()); }; } +// #define DEBUG_CALL(STREAM) if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; } +// #define DEBUG_KERNEL(STREAM, OP_NUM) if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString(__LINE__); throw std::runtime_error(tOp.c_str()); }; } // #define LAUNCH(A, B, C, D) <<>> @@ -11067,7 +11248,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #define _EXPAND_OP_CALL_1(NAME, TYPE, PARAMZ, NUM_A, TYPE_A) NAME>PARAMZ; // #define _EXPAND_OP_DIRECT(PARAMZ, NUM_A, TYPE_A) case NUM_A: { z = TYPE_A::op PARAMZ; break; } -// #define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance()->storeOperation(TYPE, #TYPE_A, NUM_A); +// #define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance().storeOperation(TYPE, #TYPE_A, NUM_A); // #define _EXPAND_FACTORY_CALL(TYPE, LAYER_ID, LAYER_NAME, ACTIVATION_ID, ACTIVATION_NAME) if (activationNum == ACTIVATION_ID && layerNum == LAYER_ID) { return new LAYER_NAME>(); }; @@ -11209,7 +11390,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // struct __registrator_##NAME { // __registrator_##NAME() { // OpName *ptr = new OpName(); -// OpRegistrator::getInstance()->registerOperation(ptr); +// OpRegistrator::getInstance().registerOperation(ptr); // } // }; // static sd::ops::__registrator_##NAME zzz_register_opd_##NAME; @@ -11277,7 +11458,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // auto shapeList = SHAPELIST(); // auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); // for (int e = 0; e < opLimit; e++) { -// auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); +// auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); // shapeList->push_back(newshape); // } // return shapeList; @@ -11288,14 +11469,14 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #define DECLARE_SYN(NAME, ORIGINAL) template // struct __registratorSynonym_##NAME { // __registratorSynonym_##NAME(const char *name, const char *oname) { -// auto ptr = reinterpret_cast(OpRegistrator::getInstance()->getOperation(oname)); +// auto ptr = reinterpret_cast(OpRegistrator::getInstance().getOperation(oname)); // if (ptr == nullptr) { // std::string newName(name); // std::string oldName(oname); -// OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName); +// OpRegistrator::getInstance().pdateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName); // return; // } -// OpRegistrator::getInstance()->registerOperation(name, ptr); +// OpRegistrator::getInstance().registerOperation(name, ptr); // } // }; // static sd::ops::__registratorSynonym_##NAME zzz_register_opd_##NAME(#NAME, #ORIGINAL) @@ -11339,7 +11520,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // auto shapeList = SHAPELIST(); // auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); // for (int e = 0; e < opLimit; e++) { -// auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); +// auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); // shapeList->push_back(newshape); // } // return shapeList; @@ -11434,12 +11615,12 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #else -// #define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT)); -// #define RELEASE(VARIABLE, WORKSPACE) if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); delete[] VARIABLE;}; +// #define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT)); +// #define RELEASE(VARIABLE, WORKSPACE) if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); delete[] VARIABLE;}; // #endif -// #define CONSTANT(SHAPE) ConstantShapeHelper::getInstance()->createFromExisting(SHAPE, block.workspace()) +// #define CONSTANT(SHAPE) ConstantShapeHelper::getInstance().createFromExisting(SHAPE, block.workspace()) @@ -12372,7 +12553,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public OpRegistrator(Pointer p) { super(p); } - public static native OpRegistrator getInstance(); + public static native @ByRef OpRegistrator getInstance(); public static native void exitHandler(); public static native void sigIntHandler(int sig); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java index c6e57e876..f10410314 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java @@ -37,7 +37,11 @@ import java.util.Scanner; "memory/MemoryType.h", "array/DataType.h", "array/DataBuffer.h", + "array/PointerDeallocator.h", + "array/PointerWrapper.h", "array/ConstantDataBuffer.h", + "array/ConstantShapeBuffer.h", + "array/ConstantOffsetsBuffer.h", "array/ConstantDescriptor.h", "array/TadPack.h", "execution/ErrorReference.h", @@ -164,6 +168,8 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled { .put(new Info("OpaqueVariablesSet").pointerTypes("OpaqueVariablesSet")) .put(new Info("OpaqueVariable").pointerTypes("OpaqueVariable")) .put(new Info("OpaqueConstantDataBuffer").pointerTypes("OpaqueConstantDataBuffer")) + .put(new Info("OpaqueConstantShapeBuffer").pointerTypes("OpaqueConstantShapeBuffer")) + .put(new Info("OpaqueConstantOffsetsBuffer").pointerTypes("OpaqueConstantOffsetsBuffer")) .put(new Info("OpaqueDataBuffer").pointerTypes("OpaqueDataBuffer")) .put(new Info("OpaqueContext").pointerTypes("OpaqueContext")) .put(new Info("OpaqueRandomGenerator").pointerTypes("OpaqueRandomGenerator")) @@ -185,7 +191,7 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled { infoMap.put(new Info("__CUDACC__", "MAX_UINT", "HAVE_MKLDNN", "__CUDABLAS__").define(false)) .put(new Info("__JAVACPP_HACK__", "LIBND4J_ALL_OPS").define(true)) .put(new Info("std::initializer_list", "cnpy::NpyArray", "sd::NDArray::applyLambda", "sd::NDArray::applyPairwiseLambda", - "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray").skip()) + "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray", "std::shared_ptr", "sd::PointerWrapper", "sd::PointerDeallocator").skip()) .put(new Info("std::string").annotations("@StdString").valueTypes("BytePointer", "String") .pointerTypes("@Cast({\"char*\", \"std::string*\"}) BytePointer")) .put(new Info("std::pair").pointerTypes("IntIntPair").define()) diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java index b4dfe31f1..24ba20057 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java @@ -124,6 +124,7 @@ public class PerformanceTrackerTests extends BaseNd4jTest { } @Test + @Ignore("useless these days") public void testTrackerGpu_1() { if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("cuda")) return;