From 69d91e272a5df21b763d80b5e11fe9f10d178fac Mon Sep 17 00:00:00 2001
From: Abdelrauf <qwr@live.ru>
Date: Thu, 14 May 2020 14:41:55 +0400
Subject: [PATCH 01/21] - new implementations for Index Reductions (#421)

* - new implementations for Index Reductions
- small fix in the legacy reduction
- disabled index reduction bench tests inside Playground

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Allow LIBND4J_TYPES

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* index reduction stuff split into bunch of units

* meh

* IMax switched to new impl

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* minor fix + test

* minor fix

* index range fix

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* noop on empty outputs

* minor fix

* minor fix

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* ArgMax replaces IMax

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* argmax/argmin/argamax/argamin shape functions updated

* ArgAmax/ArgAmin/ArgMin replaces IAMax/IAMin/IMin

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* argmax/argmin/argamax/argamin CUDA

* IMax replaced in dl4j

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Codegen output

* imports fixed

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* fix compilation issue

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Auto-generate compilation units

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Should fix NDArray refactored function calls in indexReductions.cu

Signed-off-by: Abdelrauf <rauf@konduit.ai>

Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
---
 .../clustering/cluster/CentersHolder.java     |   8 +-
 .../vectorizer/BagOfWordsVectorizerTest.java  |   6 +-
 libnd4j/blas/CMakeLists.txt                   |  14 +
 libnd4j/include/helpers/LoopsCoordsHelper.h   | 508 +++++++++-
 libnd4j/include/loops/cpu/indexreduce.hpp     |   2 +-
 libnd4j/include/loops/cuda/indexreduce.cu     |  15 +-
 .../ops/declarable/generic/reduce/argamax.cpp |  95 ++
 .../ops/declarable/generic/reduce/argamin.cpp |  95 ++
 .../ops/declarable/generic/reduce/argmax.cpp  |  37 +-
 .../ops/declarable/generic/reduce/argmin.cpp  |  43 +-
 .../ops/declarable/headers/parity_ops.h       |  26 +
 .../cpu/compilation_units/argamax.cpp.in      |  28 +
 .../cpu/compilation_units/argamin.cpp.in      |  28 +
 .../cpu/compilation_units/argmax.cpp.in       |  28 +
 .../cpu/compilation_units/argmin.cpp.in       |  28 +
 .../crop_and_resize_0.cpp                     |   2 +-
 .../crop_and_resize_1.cpp                     |   2 +-
 .../crop_and_resize_2.cpp                     |   2 +-
 .../crop_and_resize_3.cpp                     |   2 +-
 .../crop_and_resize_4.cpp                     |   2 +-
 .../crop_and_resize_5.cpp                     |   2 +-
 .../crop_and_resize_6.cpp                     |   2 +-
 .../crop_and_resize_7.cpp                     |   2 +-
 .../crop_and_resize_8.cpp                     |   2 +-
 .../crop_and_resize_9.cpp                     |   2 +-
 .../helpers/cpu/indexReductions.cpp           |  56 ++
 .../helpers/cpu/indexReductions.hpp           | 900 ++++++++++++++++++
 .../helpers/cuda/indexReductions.cu           | 106 +++
 .../ops/declarable/helpers/reductions.h       |  41 +
 .../layers_tests/DeclarableOpsTests19.cpp     |  14 +
 .../layers_tests/PlaygroundTests.cpp          | 255 ++++-
 .../nd4j/autodiff/samediff/ops/SDBaseOps.java |  16 +-
 .../nd4j/autodiff/samediff/ops/SDMath.java    |  16 +-
 .../samediff/serde/LegacyOpMapper.java        |   8 -
 .../autodiff/validation/OpValidation.java     |   4 -
 .../converters/ImportClassMapping.java        |   6 +-
 .../linalg/api/ops/impl/indexaccum/IAMax.java |  78 --
 .../linalg/api/ops/impl/indexaccum/IAMin.java |  80 --
 .../linalg/api/ops/impl/indexaccum/IMax.java  |  87 --
 .../linalg/api/ops/impl/indexaccum/IMin.java  |  83 --
 .../ops/impl/indexaccum/custom/ArgAmax.java   | 111 +++
 .../ops/impl/indexaccum/custom/ArgAmin.java   | 111 +++
 .../ops/impl/indexaccum/custom/ArgMax.java    |  49 +-
 .../ops/impl/indexaccum/custom/ArgMin.java    |  49 +-
 .../java/org/nd4j/linalg/factory/Nd4j.java    |  12 +-
 .../org/nd4j/linalg/factory/ops/NDBase.java   |   8 +-
 .../org/nd4j/linalg/factory/ops/NDMath.java   |   8 +-
 .../java/org/nd4j/nativeblas/Nd4jCpu.java     |  54 ++
 .../opvalidation/ReductionOpValidation.java   |   8 +-
 .../autodiff/samediff/NameScopeTests.java     |   2 +-
 .../test/java/org/nd4j/linalg/Nd4jTestsC.java |  34 +-
 .../java/org/nd4j/linalg/crash/CrashTest.java |   4 +-
 .../nd4j/linalg/ops/OpExecutionerTests.java   |  26 +-
 .../nd4j/linalg/ops/OpExecutionerTestsC.java  |  20 +-
 .../org/nd4j/linalg/shape/EmptyTests.java     |   3 +-
 55 files changed, 2742 insertions(+), 488 deletions(-)
 create mode 100644 libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
 create mode 100644 libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_0.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_1.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_2.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_3.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_4.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_5.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_6.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_7.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_8.cpp (95%)
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{ => crop_and_resize}/crop_and_resize_9.cpp (95%)
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp
 create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
 create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
 create mode 100644 libnd4j/include/ops/declarable/helpers/reductions.h
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java
 create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java
 create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java

diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java
index e692f9bd0..25542dc8f 100644
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/cluster/CentersHolder.java
@@ -20,7 +20,7 @@ import org.deeplearning4j.clustering.algorithm.Distance;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.ReduceOp;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.common.primitives.Pair;
 
@@ -29,7 +29,7 @@ public class CentersHolder {
     private long index = 0;
 
     protected transient ReduceOp op;
-    protected IMin imin;
+    protected ArgMin imin;
     protected transient INDArray distances;
     protected transient INDArray argMin;
 
@@ -60,7 +60,7 @@ public class CentersHolder {
 
         if (op == null) {
             op = ClusterUtils.createDistanceFunctionOp(distanceFunction, centers, point.getArray(), 1);
-            imin = new IMin(distances, argMin);
+            imin = new ArgMin(distances, argMin);
             op.setZ(distances);
         }
 
@@ -84,7 +84,7 @@ public class CentersHolder {
 
         if (op == null) {
             op = ClusterUtils.createDistanceFunctionOp(distanceFunction, centers, point.getArray(), 1);
-            imin = new IMin(distances, argMin);
+            imin = new ArgMin(distances, argMin);
             op.setZ(distances);
         }
 
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java
index 368b48ee9..e450e6095 100755
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/bagofwords/vectorizer/BagOfWordsVectorizerTest.java
@@ -23,6 +23,7 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.junit.Rule;
 import org.junit.rules.TemporaryFolder;
 import org.nd4j.common.io.ClassPathResource;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
 import org.deeplearning4j.models.word2vec.VocabWord;
 import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
 import org.deeplearning4j.text.sentenceiterator.labelaware.LabelAwareFileSentenceIterator;
@@ -31,7 +32,6 @@ import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFac
 import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
 import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.common.util.SerializationUtils;
@@ -111,7 +111,7 @@ public class BagOfWordsVectorizerTest extends BaseDL4JTest {
         INDArray labelz = dataSet.getLabels();
         log.info("Labels array: " + labelz);
 
-        int idx2 = Nd4j.getExecutioner().exec(new IMax(labelz)).getInt(0);
+        int idx2 = Nd4j.getExecutioner().exec(new ArgMax(labelz))[0].getInt(0);
         //int idx2 = ((IndexAccumulation) Nd4j.getExecutioner().exec(new IMax(labelz))).getFinalResult().intValue();
 
         //        assertEquals(1.0, dataSet.getLabels().getDouble(0), 0.1);
@@ -125,7 +125,7 @@ public class BagOfWordsVectorizerTest extends BaseDL4JTest {
         assertEquals(1, dataSet.getFeatures().getDouble(vocabCache.tokenFor("1").getIndex()), 0.1);
         assertEquals(0, dataSet.getFeatures().getDouble(vocabCache.tokenFor("2").getIndex()), 0.1);
 
-        int idx1 = Nd4j.getExecutioner().exec(new IMax(dataSet.getLabels())).getInt(0);
+        int idx1 = Nd4j.getExecutioner().exec(new ArgMax(dataSet.getLabels()))[0].getInt(0);
         //int idx1 = ((IndexAccumulation) Nd4j.getExecutioner().exec(new IMax(dataSet.getLabels()))).getFinalResult().intValue();
 
         //assertEquals(0.0, dataSet.getLabels().getDouble(0), 0.1);
diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt
index 8c8d5fb22..9902649f8 100755
--- a/libnd4j/blas/CMakeLists.txt
+++ b/libnd4j/blas/CMakeLists.txt
@@ -294,12 +294,26 @@ elseif(SD_CPU)
     file(GLOB_RECURSE LEGACY_SOURCES false ../include/legacy/impl/*.cpp  ../include/legacy/cpu/*.cpp ../include/legacy/*.h)
     file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/*.cpp ../include/loops/*.h)
 
+
+    file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in)
+    foreach(FL_ITEM ${COMPILATION_UNITS})  
+                string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM})
+                set(FL_ITEM_WLE ${CMAKE_MATCH_1})
+                foreach(FL_TYPE_INDEX RANGE 0 9)
+                message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
+                    configure_file(  "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY)
+                    LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp )
+                endforeach() 
+    endforeach() 
+
     if (SD_X86_BUILD)
         # we disable platform optimizations for certains files for linux/macos
         set_source_files_properties(cpu/NativeOps.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64 -mtune=generic")
         set_source_files_properties(../include/helpers/impl/OpTracker.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64 -mtune=generic")
     endif()
 
+
+
     if(SD_CHECK_VECTORIZATION)
        set(VECT_FILES cpu/NativeOps.cpp ${OPS_SOURCES} ${HELPERS_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES} ${LOOPS_SOURCES})
        if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
diff --git a/libnd4j/include/helpers/LoopsCoordsHelper.h b/libnd4j/include/helpers/LoopsCoordsHelper.h
index cd578b62a..8a1160aea 100644
--- a/libnd4j/include/helpers/LoopsCoordsHelper.h
+++ b/libnd4j/include/helpers/LoopsCoordsHelper.h
@@ -19,12 +19,13 @@
  //
 #ifndef LIBND4J_LOOPCOORDSHELPER_H
 #define LIBND4J_LOOPCOORDSHELPER_H
-
+#include <vector>
 #include <cstddef>
 #include <type_traits>
 #include <utility>
 #include <system/pointercast.h>
 #include <system/op_boilerplate.h>
+#include <helpers/shape.h>
 namespace sd {
 
 #if defined(__GNUC__)
@@ -125,7 +126,7 @@ namespace sd {
 	}
 
 
-	FORCEINLINE zip_size_t offset_from_coords(const Nd4jLong*& x_strides, const Nd4jLong*& z_strides, const Nd4jLong* coords, const Nd4jLong& rank) {
+	FORCEINLINE zip_size_t offset_from_coords(const Nd4jLong* x_strides, const Nd4jLong* z_strides, const Nd4jLong* coords, const Nd4jLong& rank) {
 
 		zip_size_t offset = { 0,0 };
 		size_t rank_4 = rank & -4;
@@ -435,6 +436,509 @@ namespace sd {
 		return last_offset;
 	}
 
+
+	struct triple_size_t {
+		size_t first;
+		size_t second;
+		size_t third;
+	};
+
+
+	template<bool Last_Index_Faster = true>
+	FORCEINLINE triple_size_t inc_coords(const Nd4jLong* bases, const Nd4jLong* x_strides, const  Nd4jLong* y_strides, const  Nd4jLong* z_strides, Nd4jLong* coords, triple_size_t last_offset, const size_t rank, const size_t skip = 0) {
+
+		Nd4jLong  val = 0;
+		for (int i = rank - skip - 1; i >= 0; i--) {
+			val = coords[i] + 1;
+			if (likely(val < bases[i])) {
+				coords[i] = val;
+				last_offset.first += x_strides[i];
+				last_offset.second += y_strides[i];
+				last_offset.third += z_strides[i];
+				break;
+			}
+			else {
+				last_offset.first -= coords[i] * x_strides[i];
+				last_offset.second -= coords[i] * y_strides[i];
+				last_offset.third -= coords[i] * z_strides[i];
+				coords[i] = 0;
+			}
+		}
+		return last_offset;
+	}
+
+	template<>
+	FORCEINLINE triple_size_t inc_coords<false>(const Nd4jLong* bases, const Nd4jLong* x_strides, const  Nd4jLong* y_strides, const  Nd4jLong* z_strides, Nd4jLong* coords, triple_size_t last_offset, const size_t rank, const size_t skip) {
+
+		Nd4jLong  val = 0;
+		for (int i = skip; i < rank; i++) {
+			val = coords[i] + 1;
+			if (likely(val < bases[i])) {
+				coords[i] = val;
+
+				last_offset.first += x_strides[i];
+				last_offset.second += y_strides[i];
+				last_offset.third += z_strides[i];
+				break;
+			}
+			else {
+				last_offset.first -= coords[i] * x_strides[i];
+				last_offset.second -= coords[i] * y_strides[i];
+				last_offset.third -= coords[i] * z_strides[i];
+				coords[i] = 0;
+			}
+		}
+		return last_offset;
+	}
+
+	FORCEINLINE triple_size_t offset_from_coords(const Nd4jLong* x_strides, const  Nd4jLong* y_strides, const  Nd4jLong* z_strides, const Nd4jLong* coords, const Nd4jLong& rank) {
+
+		triple_size_t offset = { 0,0 ,0 };
+		size_t rank_4 = rank & -4;
+		for (int i = 0; i < rank_4; i += 4) {
+			offset.first = offset.first
+				+ coords[i] * x_strides[i]
+				+ coords[i + 1] * x_strides[i + 1]
+				+ coords[i + 2] * x_strides[i + 2]
+				+ coords[i + 3] * x_strides[i + 3];
+			offset.second = offset.second
+				+ coords[i] * y_strides[i]
+				+ coords[i + 1] * y_strides[i + 1]
+				+ coords[i + 2] * y_strides[i + 2]
+				+ coords[i + 3] * y_strides[i + 3];
+			offset.third = offset.third
+				+ coords[i] * z_strides[i]
+				+ coords[i + 1] * z_strides[i + 1]
+				+ coords[i + 2] * z_strides[i + 2]
+				+ coords[i + 3] * z_strides[i + 3];
+		}
+		for (int i = rank_4; i < rank; i++) {
+			offset.first += coords[i] * x_strides[i];
+			offset.second += coords[i] * y_strides[i];
+			offset.third += coords[i] * z_strides[i];
+		}
+		return offset;
+	}
+
+
+	template<bool Last_Index_Faster = true>
+	FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip = 0)
+	{
+		if (skip < 0 || skip >= rank) skip = 0;
+		Nd4jLong total = 1;
+		for (int i = 0; i < rank - skip; i++) {
+			total *= bases[i];
+		}
+		return total;
+	}
+
+
+	template<>
+	FORCEINLINE Nd4jLong getLength<false>(const Nd4jLong* bases, int rank, int skip)
+	{
+		if (skip < 0 || skip >= rank) skip = 0;
+		Nd4jLong total = 1;
+		for (int i = skip; i < rank; i++) {
+			total *= bases[i];
+		}
+
+		return total;
+	}
+
+
+	template<bool Last_Index_Faster = true>
+	FORCEINLINE Nd4jLong getLength(const Nd4jLong* bases, int rank, int skip, Nd4jLong& outSkippedLength)
+	{
+		if (skip < 0 || skip >= rank) skip = 0;
+		Nd4jLong total = 1;
+		for (int i = 0; i < rank - skip; i++) {
+			total *= bases[i];
+		}
+		if (skip > 0) {
+			outSkippedLength = 1;
+			for (int i = rank - skip; i < rank; i++) {
+				outSkippedLength *= bases[i];
+			}
+		}
+		else {
+			outSkippedLength = 0;
+		}
+		return total;
+	}
+
+
+	template<>
+	FORCEINLINE Nd4jLong getLength<false>(const Nd4jLong* bases, int rank, int skip, Nd4jLong& outSkippedLength)
+	{
+		if (skip < 0 || skip >= rank) skip = 0;
+		if (skip > 0) {
+			outSkippedLength = 1;
+			for (int i = 0; i < skip; i++) {
+				outSkippedLength *= bases[i];
+			}
+		}
+		else {
+			outSkippedLength = 0;
+		}
+		Nd4jLong total = 1;
+		for (int i = skip; i < rank; i++) {
+			total *= bases[i];
+		}
+
+		return total;
+	}
+
+	/*
+	for ODR rule it willbe declared as inline
+	rePartition for reductions and et cet
+	Indices mentioned in the dimension list will be moved to the tail
+	This way it will be splitted into two parts
+	the first part will contain output part,the second tail part will be used for reductions and other purposes
+	if squash is True then  it will attempt to minimize the output ( for both orders) and the tail
+*/
+
+	FORCEINLINE void rePartition(char order, const std::vector<int>& dimensions, const size_t rank, const Nd4jLong* bases, const Nd4jLong* strides, Nd4jLong(&new_bases)[MAX_RANK], Nd4jLong(&new_strides)[MAX_RANK], int& first_begin, int& first_end, int& second_begin, int& second_end, bool first_squash = false, bool second_squash = true) {
+
+		bool indices[MAX_RANK] = {};
+		int ind = 0;
+		size_t second_rank;
+		if (dimensions.size() == 0 || (dimensions.size() == 1 && dimensions.at(0) == sd::DataTypeUtils::max<int>())){
+			first_end = 0;
+			first_begin = 0;
+			//treat it as the whole
+			for (int i = 0; i < rank; i++) {
+				new_bases[i] = bases[i];
+				new_strides[i] = strides[i];
+			}
+			second_rank = rank;
+			second_end = rank;
+			second_begin = 0;
+
+		}
+		else {
+			for (int index : dimensions) {
+				if (index < 0) index = rank + index;
+				if (index >= 0 && index < rank) {
+					indices[index] = true;
+				}
+			}
+
+
+			//move output ones and
+			for (int i = 0; i < rank; i++) {
+
+				if (!indices[i]) {
+
+					new_bases[ind] = bases[i];
+					new_strides[ind] = strides[i];
+					ind++;
+				}
+			}
+
+
+			int first_rank = ind;
+
+			first_end = ind;
+			first_begin = 0;
+			//nd4j_printf("rffrr ss & %d ind-- %d %d\n", first_rank, first_begin, first_end);
+			//squash output rank 
+			if (first_squash && first_rank > 1) {
+
+				if (order == 'c') {
+					int uniq_ind = first_end-1;
+					for (int i = first_end - 2; i >= first_begin; i--) {
+						if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) {
+							new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind];
+							new_strides[uniq_ind] = new_strides[uniq_ind];
+							--first_rank;
+						}
+						else {
+							--uniq_ind;
+							new_bases[uniq_ind] = new_bases[i];
+							new_strides[uniq_ind] = new_strides[i];
+						}
+					}
+					first_begin = first_end - first_rank;
+				}
+				else {
+					//squash fortran 
+					int uniq_ind = 0;
+					for (int i = 1; i < first_end; i++) {
+						if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) {
+							new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind];
+							new_strides[uniq_ind] = new_strides[uniq_ind];
+							--first_rank;
+						}
+						else {
+							uniq_ind++;
+							new_bases[uniq_ind] = new_bases[i];
+							new_strides[uniq_ind] = new_strides[i];
+						}
+					}
+					first_end = first_begin + first_rank;
+
+				} 
+				ind = first_end;
+			}
+
+			//nd4j_printf("rffrr ss & %d ind-- %d %d\n", first_rank, first_begin, first_end);
+			//move process indices
+			for (int i = 0; i < rank; i++) {
+				if (indices[i]) {
+					new_bases[ind] = bases[i];
+					new_strides[ind] = strides[i];
+					ind++;
+				}
+			}
+
+			second_rank = ind - first_end;
+			second_end = ind;
+			second_begin = first_end;
+
+		}
+		
+
+		if (second_squash && second_rank > 1) {
+
+			if (order == 'c') {
+				int uniq_ind = second_end - 1;
+				for (int i = second_end - 2; i >= second_begin; i--) {
+					if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) {
+						new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind];
+						new_strides[uniq_ind] = new_strides[uniq_ind];
+						--second_rank;
+					}
+					else {
+						--uniq_ind;
+						new_bases[uniq_ind] = new_bases[i];
+						new_strides[uniq_ind] = new_strides[i];
+					}
+				}
+				second_begin = second_end - second_rank;
+			}
+			else {
+				int uniq_ind = second_begin;
+				for (int i = second_begin+1; i < second_end; i++) {
+					if (new_strides[i] == new_bases[uniq_ind] * new_strides[uniq_ind]) {
+						new_bases[uniq_ind] = new_bases[i] * new_bases[uniq_ind];
+						new_strides[uniq_ind] = new_strides[uniq_ind];
+						--second_rank;
+					}
+					else {
+						uniq_ind++;
+						new_bases[uniq_ind] = new_bases[i];
+						new_strides[uniq_ind] = new_strides[i];
+					}
+				}
+				second_end = second_begin + second_rank;
+
+			}
+			
+		}
+
+		return;
+	}
+
+	//basic CRTP static polymorphism classes for offset increments
+
+	template<typename Derived>
+	struct CoordsBaseMovement {
+		void init(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+			static_cast<Derived*>(this)->initImpl(bases, strides1, strides2, rank, start);
+		}
+
+		void increment(int skipRank = 0) {
+			static_cast<Derived*>(this)->incrementImpl(skipRank);
+		}
+
+		Nd4jLong  First() { return static_cast<Derived*>(this)->FirstImpl(); };
+		Nd4jLong  Second() { return static_cast<Derived*>(this)->SecondImpl(); };
+	};
+
+
+	struct ZipGenericCoordsRank1Stride1 : CoordsBaseMovement<ZipGenericCoordsRank1Stride1> {
+
+		size_t offset1;
+		size_t offset2;
+
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+			offset1 = start;
+			offset2 = start;
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset1 += 1;
+			offset2 += 1;
+		}
+
+		Nd4jLong  FirstImpl() { return offset1; };
+		Nd4jLong  SecondImpl() { return offset2; };
+
+	};
+
+	struct ZipGenericCoordsRank1BothStrideN : CoordsBaseMovement<ZipGenericCoordsRank1BothStrideN> {
+		size_t stride1;
+		size_t stride2;
+		size_t offset1;
+		size_t offset2;
+
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+			stride1 = strides1[0];
+			stride2 = strides2[0];
+			offset1 = start * stride1;
+			offset2 = start * stride2;
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset1 += stride1;
+			offset2 += stride2;
+		}
+
+		Nd4jLong  FirstImpl() { return offset1; };
+		Nd4jLong  SecondImpl() { return offset2; };
+
+	};
+
+	template<int ConstRank, bool LastIndexFaster = true>
+	struct ZipGenericCoordsConstMovementSecondStride1 : CoordsBaseMovement<ZipGenericCoordsConstMovementSecondStride1<ConstRank, LastIndexFaster>> {
+		sd::CoordsState<ConstRank - 1> cst;
+		Nd4jLong coords[MAX_RANK];
+		size_t offset1;
+		size_t offset2;
+		int _rank;
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+			offset1 = sd::init_coords<ConstRank, 0, LastIndexFaster>(cst, start, bases, strides1);
+			offset2 = start * 1;
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset1 = sd::inc_coords<ConstRank, 0, LastIndexFaster>(cst, offset1);
+			offset2 += 1;
+		}
+
+		Nd4jLong  FirstImpl() { return offset1; };
+		Nd4jLong  SecondImpl() { return offset2; };
+
+	};
+
+	template<int ConstRank, bool LastIndexFaster = true>
+	struct ZipGenericCoordsConstMovementSecondStrideN : CoordsBaseMovement<ZipGenericCoordsConstMovementSecondStrideN<ConstRank, LastIndexFaster>> {
+		sd::CoordsState<ConstRank - 1> cst;
+		Nd4jLong _stride2;
+		Nd4jLong coords[MAX_RANK];
+		size_t offset1;
+		size_t offset2;
+		int _rank;
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+			_stride2 = strides2[0];
+			offset1 = sd::init_coords<ConstRank, 0, LastIndexFaster>(cst, start, bases, strides1);
+			offset2 = start * _stride2;
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset1 = sd::inc_coords<ConstRank, 0, LastIndexFaster>(cst, offset1);
+			offset2 += _stride2;
+		}
+
+		Nd4jLong  FirstImpl() { return offset1; };
+		Nd4jLong  SecondImpl() { return offset2; };
+
+	};
+
+	template<bool LastIndexFaster = true>
+	struct ZipGenericCoordsMovementSecondStrideN : CoordsBaseMovement<ZipGenericCoordsMovementSecondStrideN<LastIndexFaster>> {
+		const Nd4jLong* _bases;
+		const Nd4jLong* _strides1;
+		Nd4jLong _stride2;
+		Nd4jLong coords[MAX_RANK];
+		zip_size_t offset;
+		int _rank;
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+
+			_bases = bases;
+			_strides1 = strides1;
+			_stride2 = strides2[0];
+			_rank = rank;
+			if (start == 0) {
+				for (int i = 0; i < MAX_RANK; i++) {
+					coords[i] = 0;
+				}
+				offset = { 0,0 };
+
+			}
+			else {
+				if (LastIndexFaster) {
+					sd::index2coords_C(start, rank, bases, (Nd4jLong*)&coords);
+				}
+				else {
+					sd::index2coords_F(start, rank, bases, (Nd4jLong*)&coords);
+				}
+				offset.first = sd::offset_from_coords(strides1, (Nd4jLong*)&coords, rank);
+				offset.second = start * _stride2;
+			}
+
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset.first = inc_coords<LastIndexFaster>(_bases, _strides1, (Nd4jLong*)&coords, offset.first, _rank, skipRank);
+			offset.second += _stride2;
+		}
+
+		Nd4jLong  FirstImpl() { return offset.first; };
+		Nd4jLong  SecondImpl() { return offset.second; };
+
+	};
+
+	template<bool LastIndexFaster = true>
+	struct ZipGenericCoordsMovement : CoordsBaseMovement<ZipGenericCoordsMovement<LastIndexFaster>> {
+		const Nd4jLong* _bases;
+		const Nd4jLong* _strides1;
+		const Nd4jLong* _strides2;
+		Nd4jLong coords[MAX_RANK];
+		zip_size_t offset;
+		int _rank;
+
+		void initImpl(const Nd4jLong* bases, const Nd4jLong* strides1, const Nd4jLong* strides2, int rank, int start = 0) {
+
+			_bases = bases;
+			_strides1 = strides1;
+			_strides2 = strides2;
+			_rank = rank;
+			if (start == 0) {
+				for (int i = 0; i < MAX_RANK; i++) {
+					coords[i] = 0;
+				}
+				offset = { 0,0 };
+
+			}
+			else {
+				if (LastIndexFaster) {
+					sd::index2coords_C(start, rank, bases, (Nd4jLong*)&coords);
+				}
+				else {
+					sd::index2coords_F(start, rank, bases, (Nd4jLong*)&coords);
+				}
+				offset = sd::offset_from_coords(strides1, strides2, (Nd4jLong*)&coords, rank);
+			}
+
+		}
+
+		void incrementImpl(int skipRank = 0) {
+			offset = inc_coords<LastIndexFaster>(_bases, _strides1, _strides2, (Nd4jLong*)&coords, offset, _rank, skipRank);
+		}
+
+		Nd4jLong  FirstImpl() { return offset.first; };
+		Nd4jLong  SecondImpl() { return offset.second; };
+
+	};
+
 }
 
+
+
 #endif
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp
index 296fbcdef..9373e3feb 100644
--- a/libnd4j/include/loops/cpu/indexreduce.hpp
+++ b/libnd4j/include/loops/cpu/indexreduce.hpp
@@ -69,7 +69,7 @@ Nd4jLong IndexReduce<X, Y>::execScalar(const void *vx, const Nd4jLong *xShapeInf
     for (int e = 0; e < maxThreads; e++)
         intermediatery[e].index = -1;
 
-    if (xEws == 1) {
+    if (xEws == 1 && shape::order(xShapeInfo) == 'c') {
         auto func = PRAGMA_THREADS_FOR {
             intermediatery[thread_id] = OpType::startingIndexValue(x);
 
diff --git a/libnd4j/include/loops/cuda/indexreduce.cu b/libnd4j/include/loops/cuda/indexreduce.cu
index e6a52b16a..dbe03a9bf 100644
--- a/libnd4j/include/loops/cuda/indexreduce.cu
+++ b/libnd4j/include/loops/cuda/indexreduce.cu
@@ -188,7 +188,7 @@ namespace functions {
             auto reductionBuffer = static_cast<X*>(vreductionBuffer);
             auto order = shape::order(xShapeInfo);
             int tid = blockIdx.x * blockDim.x + threadIdx.x;
-            __shared__ volatile int resultScalar;
+            __shared__ volatile bool resultScalar;
 
             //shared memory space for storing intermediate results
             __shared__ IndexValue<X>* sPartials;
@@ -214,17 +214,10 @@ namespace functions {
                     zLen = shape::length(zShapeInfo);
                 else zLen = 1;
 
-                if (dimensionLength == 1) {
-                    if (zLen == 1 && (dimension == nullptr || dimension[0] == MAX_DIMENSION))
-                        resultScalar = 1;
-                    else
-                        resultScalar = 0;
-                }
-                else
-                    resultScalar = 0;
-
                 if (zLen == 1)
-                    resultScalar = 1;
+                    resultScalar = true;
+                else
+                    resultScalar = false;
 
                 xLength = shape::length(xShapeInfo);
             }
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
new file mode 100644
index 000000000..5fb452227
--- /dev/null
+++ b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2019 Konduit K.K.
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+ // Created by Abdelrauf 2020 (based on argmax)
+
+#include <system/op_boilerplate.h>
+#if NOT_EXCLUDED(OP_argamax)
+
+#include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
+#include <ops/declarable/CustomOperations.h>
+#include <helpers/ConstantTadHelper.h>
+
+namespace sd {
+    namespace ops {
+        DECLARE_TYPES(argamax) {
+            getOpDescriptor()
+                ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS })
+                ->setAllowedOutputTypes({ ALL_INTS });
+        }
+
+        CUSTOM_OP_IMPL(argamax, 1, 1, false, 0, -2) {
+            auto input = INPUT_VARIABLE(0);
+            auto output = OUTPUT_VARIABLE(0);
+
+            if (output->isEmpty())
+                return Status::OK();
+
+            auto axis = *block.getIArguments();
+
+            // axis might be dynamic (i.e. tf mode)
+            if (block.width() > 1 && axis.size() == 0) {
+                auto axisVector = INPUT_VARIABLE(1);
+                helpers::adjustAxis(input->rankOf(), axisVector, axis);
+                helpers::argAbsMax(*input, *output, axis);
+            }
+            else {
+                helpers::argAbsMax(*input, *output, axis);
+            }
+
+            STORE_RESULT(output);
+
+            return Status::OK();
+        }
+
+        DECLARE_SHAPE_FN(argamax) {
+            std::vector<int> dims;
+
+            if (block.width() == 1) {
+                dims = *block.getIArguments();
+            } else {
+                auto y = INPUT_VARIABLE(1);
+                dims = y->template asVectorT<int>();
+            }
+
+            auto keepDims = block.numB() ? B_ARG(0) : false;
+            auto dtype = block.numD() ? D_ARG(0) : DataType::INT64;
+
+            // we're resolving negative axis here
+            helpers::adjustAxis(shape::rank(inputShape->at(0)), dims);
+
+            auto in = inputShape->at(0);
+            for (auto d : dims) {
+                // we have special case here
+                if (d == sd::DataTypeUtils::max<int>())
+                    continue;
+
+                REQUIRE_TRUE(d < shape::rank(in), 0, "ArgAmax: axis can't be above rank")
+                REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgAmax: you can't reduce along axis with 0 in shape");
+            }
+
+            // special case - output is scalar
+            if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
+                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+            }
+
+            return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
+        }
+    }
+}
+
+#endif
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
new file mode 100644
index 000000000..4f590aae8
--- /dev/null
+++ b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2019 Konduit K.K.
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+ // Created by Abdelrauf 2020 (based on argmax)
+
+#include <system/op_boilerplate.h>
+#if NOT_EXCLUDED(OP_argamin)
+
+#include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
+#include <ops/declarable/CustomOperations.h>
+#include <helpers/ConstantTadHelper.h>
+
+namespace sd {
+    namespace ops {
+        DECLARE_TYPES(argamin) {
+            getOpDescriptor()
+                ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS })
+                ->setAllowedOutputTypes({ ALL_INTS });
+        }
+
+        CUSTOM_OP_IMPL(argamin, 1, 1, false, 0, -2) {
+            auto input = INPUT_VARIABLE(0);
+            auto output = OUTPUT_VARIABLE(0);
+
+            if (output->isEmpty())
+                return Status::OK();
+
+            auto axis = *block.getIArguments();
+
+            // axis might be dynamic (i.e. tf mode)
+            if (block.width() > 1 && axis.size() == 0) {
+                auto axisVector = INPUT_VARIABLE(1);
+                helpers::adjustAxis(input->rankOf(), axisVector, axis);
+                helpers::argAbsMin(*input, *output, axis);
+            }
+            else {
+                helpers::argAbsMin(*input, *output, axis);
+            }
+
+            STORE_RESULT(output);
+
+            return Status::OK();
+        }
+
+        DECLARE_SHAPE_FN(argamin) {
+            std::vector<int> dims;
+
+            if (block.width() == 1) {
+                dims = *block.getIArguments();
+            } else {
+                auto y = INPUT_VARIABLE(1);
+                dims = y->template asVectorT<int>();
+            }
+
+            auto keepDims = block.numB() ? B_ARG(0) : false;
+            auto dtype = block.numD() ? D_ARG(0) : DataType::INT64;
+
+            // we're resolving negative axis here
+            helpers::adjustAxis(shape::rank(inputShape->at(0)), dims);
+
+            auto in = inputShape->at(0);
+            for (auto d : dims) {
+                // we have special case here
+                if (d == sd::DataTypeUtils::max<int>())
+                    continue;
+
+                REQUIRE_TRUE(d < shape::rank(in), 0, "ArgAmin: axis can't be above rank")
+                REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgAmin: you can't reduce along axis with 0 in shape");
+            }
+
+            // special case - output is scalar
+            if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
+                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+            }
+
+            return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
+        }
+    }
+}
+
+#endif
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
index 928a0f7d0..9c45b4c37 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
@@ -1,6 +1,6 @@
 /*******************************************************************************
  * Copyright (c) 2015-2018 Skymind, Inc.
- *
+ * Copyright (c) 2019 Konduit K.K.
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
  * https://www.apache.org/licenses/LICENSE-2.0.
@@ -22,6 +22,7 @@
 #if NOT_EXCLUDED(OP_argmax)
 
 #include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
 #include <ops/declarable/CustomOperations.h>
 #include <helpers/ConstantTadHelper.h>
 
@@ -29,7 +30,7 @@ namespace sd {
     namespace ops {
         DECLARE_TYPES(argmax) {
             getOpDescriptor()
-                    ->setAllowedInputTypes(sd::DataType::ANY)
+                    ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS })
                     ->setAllowedOutputTypes({ALL_INTS});
         }
 
@@ -37,18 +38,19 @@ namespace sd {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
-            auto axis = *block.getIArguments();
+            if (output->isEmpty())
+                return Status::OK();
 
+            auto axis = *block.getIArguments();
+ 
             // axis might be dynamic (i.e. tf mode)
             if (block.width() > 1 && axis.size() == 0) {
                 auto axisVector = INPUT_VARIABLE(1);
                 helpers::adjustAxis(input->rankOf(), axisVector, axis);
-
-                input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
+                helpers::argMax(*input, *output, axis);
             } else {
-                helpers::adjustAxis(input->rankOf(), axis);
+                helpers::argMax(*input, *output, axis);
 
-                input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
             }
 
             STORE_RESULT(output);
@@ -66,23 +68,28 @@ namespace sd {
                 dims = y->template asVectorT<int>();
             }
 
+            auto keepDims = block.numB() ? B_ARG(0) : false;
+            auto dtype = block.numD() ? D_ARG(0) : DataType::INT64;
+
             // we're resolving negative axis here
             helpers::adjustAxis(shape::rank(inputShape->at(0)), dims);
 
-            if (dims.size() > 1)
-                std::sort(dims.begin(), dims.end());
+            auto in = inputShape->at(0);
+            for (auto d : dims) {
+                // we have special case here
+                if (d == sd::DataTypeUtils::max<int>())
+                    continue;
 
-
-            for (auto d:dims) {
-                REQUIRE_TRUE(inputShape->at(0)[d+1] != 0, 0, "ArgMax: you can't reduce along axis with 0 in shape");
+                REQUIRE_TRUE(d < shape::rank(in), 0, "ArgMax: axis can't be above rank")
+                REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgMax: you can't reduce along axis with 0 in shape");
             }
 
             // special case - output is scalar
-            if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64));
+            if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
+                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
             }
 
-            return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), DataType::INT64, false, false, block.getWorkspace()));
+            return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
index f4fb25daa..97430a24f 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
@@ -21,15 +21,17 @@
 #include <system/op_boilerplate.h>
 #if NOT_EXCLUDED(OP_argmin)
 
-#include <ops/declarable/CustomOperations.h>
 #include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
+#include <ops/declarable/CustomOperations.h>
+#include <helpers/ConstantTadHelper.h>
 
 namespace sd {
     namespace ops {
 
         DECLARE_TYPES(argmin) {
             getOpDescriptor()
-                    ->setAllowedInputTypes(sd::DataType::ANY)
+                ->setAllowedInputTypes({ ALL_FLOATS,ALL_INTS })
                     ->setAllowedOutputTypes({ALL_INTS});
         }
 
@@ -39,16 +41,18 @@ namespace sd {
 
             auto output = OUTPUT_VARIABLE(0);
 
+            if (output->isEmpty())
+                return Status::OK();
+
             // axis might be dynamic (i.e. tf mode)
             if (block.width() > 1 && axis.size() == 0) {
                 auto axisVector = INPUT_VARIABLE(1);
                 helpers::adjustAxis(input->rankOf(), axisVector, axis);
+                helpers::argMin(*input, *output, axis);
+            }
+            else {
+                helpers::argMin(*input, *output, axis);
 
-                input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
-            } else {
-                helpers::adjustAxis(input->rankOf(), axis);
-
-                input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
             }
 
             STORE_RESULT(output);
@@ -58,7 +62,7 @@ namespace sd {
 
         DECLARE_SHAPE_FN(argmin) {
             std::vector<int> dims;
-            auto in = inputShape->at(0);
+
             if (block.width() == 1) {
                 dims = *block.getIArguments();
             } else {
@@ -66,23 +70,28 @@ namespace sd {
                 dims = y->template asVectorT<int>();
             }
 
+            auto keepDims = block.numB() ? B_ARG(0) : false;
+            auto dtype = block.numD() ? D_ARG(0) : DataType::INT64;
+
             // we're resolving negative axis here
-            helpers::adjustAxis(shape::rank(in), dims);
+            helpers::adjustAxis(shape::rank(inputShape->at(0)), dims);
 
-            if (dims.size() > 1)
-                std::sort(dims.begin(), dims.end());
+            auto in = inputShape->at(0);
+            for (auto d : dims) {
+                // we have special case here
+                if (d == sd::DataTypeUtils::max<int>())
+                    continue;
 
-            for (auto d:dims) {
-                REQUIRE_TRUE(inputShape->at(0)[d+1] != 0, 0, "ArgMin: you can't reduce along axis with 0 in shape");
+                REQUIRE_TRUE(d < shape::rank(in), 0, "ArgMin: axis can't be above rank")
+                REQUIRE_TRUE(in[d + 1] != 0, 0, "ArgMin: you can't reduce along axis with 0 in shape");
             }
 
             // special case - output is scalar
-            if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64));
+            if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
+                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
             }
 
-            auto newShape = ShapeUtils::evalReduceShapeInfo('c', dims, in, DataType::INT64, false, false, block.getWorkspace());
-            return SHAPELIST(newShape);
+            return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
         }
 
     }
diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h
index 8fae1b63c..74221133c 100644
--- a/libnd4j/include/ops/declarable/headers/parity_ops.h
+++ b/libnd4j/include/ops/declarable/headers/parity_ops.h
@@ -52,6 +52,32 @@ namespace sd {
         DECLARE_CUSTOM_OP(argmin, 1, 1, false, 0, -2);
         #endif
 
+        /**
+         * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s))
+         * Expected input:
+         * 0: N-dimensional array
+         * 1: optional axis vector
+         *
+         * Int args:
+         * 0: optional axis
+         */
+        #if NOT_EXCLUDED(OP_argamax)
+        DECLARE_CUSTOM_OP(argamax, 1, 1, false, 0, -2);
+        #endif
+
+        /**
+         * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s))
+         * Expected input:
+         * 0: N-dimensional array
+         * 1: optional axis vector
+         *
+         * Int args:
+         * 0: optional axis
+         */
+        #if NOT_EXCLUDED(OP_argamin)
+        DECLARE_CUSTOM_OP(argamin, 1, 1, false, 0, -2);
+        #endif
+
         /**
          * This operation provides various normalization modes:
          * 0: frobenius
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
new file mode 100644
index 000000000..533a94aab
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
@@ -0,0 +1,28 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+//
+// @author AbdelRauf
+//
+
+#include <ops/declarable/helpers/cpu/indexReductions.hpp>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            BUILD_DOUBLE_TEMPLATE(template void argAbsMax_, (const NDArray& input, NDArray& output, const std::vector<int>& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
new file mode 100644
index 000000000..4f7c78505
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
@@ -0,0 +1,28 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+//
+// @author AbdelRauf
+//
+
+#include <ops/declarable/helpers/cpu/indexReductions.hpp>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            BUILD_DOUBLE_TEMPLATE(template void argAbsMin_, (const NDArray& input, NDArray& output, const std::vector<int>& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
new file mode 100644
index 000000000..770f155f4
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
@@ -0,0 +1,28 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+//
+// @author AbdelRauf
+//
+
+#include <ops/declarable/helpers/cpu/indexReductions.hpp>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            BUILD_DOUBLE_TEMPLATE(template void argMax_, (const NDArray& input, NDArray& output, const std::vector<int>& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
new file mode 100644
index 000000000..0149b890e
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
@@ -0,0 +1,28 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+//
+// @author AbdelRauf
+//
+
+#include <ops/declarable/helpers/cpu/indexReductions.hpp>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            BUILD_DOUBLE_TEMPLATE(template void argMin_, (const NDArray& input, NDArray& output, const std::vector<int>& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
index 94e74cd84..22258266b 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_0.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
index 9820c1392..f2b891d5e 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_1.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
index 2a78f285f..c475d994c 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_2.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp
index 13757997a..11175a02d 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_3.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
index ea3043eeb..cea328084 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_4.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
index 60c1ae906..81bb8e897 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_5.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
index 6e33d5546..415ab39e2 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_6.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
index ef4a199fd..47d16e6db 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_7.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
index 71cd2ebb8..902ade68c 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_8.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
similarity index 95%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
index e9db5c303..559564903 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize_9.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
@@ -19,7 +19,7 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "../crop_and_resize.hpp"
+#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
 
 namespace sd {
     namespace ops {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp
new file mode 100644
index 000000000..4665a7b6f
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.cpp
@@ -0,0 +1,56 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+//
+// @author AbdelRauf
+//
+
+#include <ops/declarable/helpers/reductions.h>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            //////////////////////////////////////////////////////////////////////////
+            template<typename X, typename Z>
+            void  argMax_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+
+            template<typename X, typename Z>
+            void  argMin_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+
+            template<typename X, typename Z>
+            void  argAbsMax_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+
+            template<typename X, typename Z>
+            void  argAbsMin_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+
+            //////////////////////////////////////////////////////////////////////////
+            void  argMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argMax_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES);
+            }
+
+            void  argMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argMin_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES);
+            }
+
+            void  argAbsMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argAbsMax_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES);
+            }
+
+            void  argAbsMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), argAbsMin_, (input, output, dimensions), LIBND4J_TYPES, INDEXING_TYPES);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
new file mode 100644
index 000000000..7d376e012
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
@@ -0,0 +1,900 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+ //
+ // @author AbdelRauf 
+ //
+#include <type_traits>
+#include <cmath>
+#include <stdexcept>
+#include <memory>
+#include <execution/Threads.h>
+#include <execution/ThreadPool.h>
+#include <helpers/LoopsCoordsHelper.h>
+#include <ops/declarable/helpers/reductions.h>
+#if 1
+#define  LOG_CALLS(X) 
+#else
+ 
+#define  LOG_CALLS(X)  nd4j_printf("___%s_________%d+\n", __PRETTY_FUNCTION__, X); 
+#endif
+namespace sd {
+	namespace ops {
+		namespace helpers {
+			constexpr int threadingThreshold = 4096;
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount)
+			{
+				argCurrent = 0;
+				current = buffer[0];
+				LOG_CALLS(0)
+				Nd4jLong j_offset = 0;
+				for (Z j = 0; j < loopCount; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j], j);
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount, const Nd4jLong& inner_stride)
+			{
+				argCurrent = 0;
+				current = buffer[0];
+				LOG_CALLS(0)
+				Nd4jLong j_offset = 0;
+				for (Z j = 0; j < loopCount; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j_offset], j);
+					j_offset += inner_stride;
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp, size_t constRank, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReductionConstRank(const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong outerLoopCount, const Nd4jLong& innerLoopCount)
+			{
+				//skip 1 from the beginning or end depending the Order 
+				constexpr size_t updated_index = LastIndexFaster ? 0 : 1;
+				constexpr size_t updated_rank = constRank - 1;
+				sd::CoordsState<updated_rank - 1> cst;
+				//we skip 1  
+				size_t offset = sd::init_coords<updated_rank, 0, LastIndexFaster>(cst, 0, bases + updated_index, strides + updated_index);
+				Z startIndex = 0;
+				argCurrent = 0;
+				current = buffer[offset];
+				LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					//typename std::make_signed<Z>::type iArgMax = -1;
+					for (Z j = 0; j < innerLoopCount; j++) {
+						ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex);
+					}
+					//we skip 1
+					offset = sd::inc_coords<updated_rank, 0, LastIndexFaster>(cst, offset);
+					startIndex += innerLoopCount;
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp, size_t constRank, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReductionConstRank(const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong outerLoopCount, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride)
+			{
+				//skip 1 from the beginning or end depending the Order 
+				constexpr size_t updated_index = LastIndexFaster ? 0 : 1;
+				constexpr size_t updated_rank = constRank - 1;
+				sd::CoordsState<updated_rank - 1> cst;
+				//we skip 1  
+				size_t offset = sd::init_coords<updated_rank, 0, LastIndexFaster>(cst, 0, bases + updated_index, strides + updated_index);
+				Z startIndex = 0;
+				argCurrent = 0;
+				current = buffer[offset];
+				LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					for (Z j = 0; j < innerLoopCount; j++) {
+						ReductionOp::update(current, argCurrent, *inner_buffer, j + startIndex);
+						inner_buffer += inner_stride;
+					}
+					//we alreaddy skiped
+					offset = sd::inc_coords<updated_rank, 0, LastIndexFaster>(cst, offset);
+					startIndex += innerLoopCount;
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReduction(const int& rank, const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong& outerLoopStart, const Nd4jLong& outerLoopStop, const Nd4jLong& innerLoopCount)
+			{
+				size_t offset = 0;
+				Nd4jLong outerLoopCount = outerLoopStop - outerLoopStart;
+				Nd4jLong coords[MAX_RANK] = {};
+				Nd4jLong* ptr_coords = (Nd4jLong*)&coords;
+				if (outerLoopStart > 0) {
+					sd::index2coords_C(outerLoopStart, rank - 1, bases, ptr_coords);
+					offset = sd::offset_from_coords(strides, ptr_coords, rank);
+				}
+				Z startIndex = outerLoopStart * innerLoopCount;
+				argCurrent = startIndex;
+				current = buffer[offset];
+				LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					//typename std::make_signed<Z>::type iArgMax = -1;
+					for (Z j = 0; j < innerLoopCount; j++) {
+						//nd4j_printf("%f\n", inner_buffer[j]);
+						ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex);
+					}
+					offset = inc_coords<true>(bases, strides, ptr_coords, offset, rank, 1);
+					//if (iArgMax >= 0) argCurrent = startIndex + iArgMax;
+					startIndex += innerLoopCount;
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReduction(const int& rank, const X* buffer, X& current, Z& argCurrent, const Nd4jLong* bases, const Nd4jLong* strides, const Nd4jLong& outerLoopStart, const Nd4jLong& outerLoopStop, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride)
+			{
+				size_t offset = 0;
+				Nd4jLong outerLoopCount = outerLoopStop - outerLoopStart;
+				Nd4jLong coords[MAX_RANK] = {};
+				Nd4jLong* ptr_coords = (Nd4jLong*)&coords;
+				if (outerLoopStart > 0) {
+					sd::index2coords_C(outerLoopStart, rank - 1, bases, ptr_coords);
+					offset = sd::offset_from_coords(strides, ptr_coords, rank);
+				}
+				Z startIndex = outerLoopStart * innerLoopCount;
+				argCurrent = startIndex;
+				current = buffer[offset];
+				LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					//typename std::make_signed<Z>::type iArgMax = -1;
+					for (Z j = 0; j < innerLoopCount; j++) {
+						ReductionOp::update(current, argCurrent, inner_buffer[j * inner_stride], startIndex + j);
+					}
+					offset = inc_coords<true>(bases, strides, ptr_coords, offset, rank, 1);
+					//offset = inc_coords<LastIndexFaster>(bases, strides, ptr_coords, offset, rank, 1);
+					//if (iArgMax >= 0) argCurrent = startIndex + iArgMax;
+					startIndex += innerLoopCount;
+				}
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1Block4WithMerge(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount)
+			{
+				argCurrent = 0;
+				current = buffer[0];
+				LOG_CALLS(0)
+				Nd4jLong loopCount4 = loopCount / 4;
+				Nd4jLong loopCountEnd = loopCount4 + (loopCount & 3);
+				const X* buffer1 = buffer + 1 * loopCount4;
+				const X* buffer2 = buffer1 + 1 * loopCount4;
+				const X* buffer3 = buffer2 + 1 * loopCount4;
+				X current1 = *buffer1;
+				X current2 = *buffer2;
+				X current3 = *buffer3;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				for (Z j = 0; j < loopCount4; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j], j);
+					ReductionOp::update(current1, argCurrent1, buffer1[j], j);
+					ReductionOp::update(current2, argCurrent2, buffer2[j], j);
+					ReductionOp::update(current3, argCurrent3, buffer3[j], j);
+				}
+				//tail
+				for (Z j = loopCount4; j < loopCountEnd; j++) {
+					ReductionOp::update(current3, argCurrent3, buffer3[j], j);
+				}
+				//merge
+				argCurrent1 += loopCount4;
+				argCurrent2 += 2 * loopCount4;
+				argCurrent3 += 3 * loopCount4;
+				ReductionOp::update(current, argCurrent, current1, argCurrent1);
+				ReductionOp::update(current, argCurrent, current2, argCurrent2);
+				ReductionOp::update(current, argCurrent, current3, argCurrent3);
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1Block4WithMerge(const X* buffer, X& current, Z& argCurrent, const Nd4jLong& loopCount, const Nd4jLong& inner_stride)
+			{
+				argCurrent = 0;
+				current = buffer[0];
+				LOG_CALLS(0)
+				Nd4jLong loopCount4 = loopCount / 4;
+				Nd4jLong loopCountEnd = loopCount4 + (loopCount & 3);
+				const X* buffer1 = buffer + inner_stride * loopCount4;
+				const X* buffer2 = buffer1 + inner_stride * loopCount4;
+				const X* buffer3 = buffer2 + inner_stride * loopCount4;
+				X current1 = *buffer1;
+				X current2 = *buffer2;
+				X current3 = *buffer3;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				Nd4jLong j_offset = 0;
+				for (Z j = 0; j < loopCount4; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j_offset], j);
+					ReductionOp::update(current1, argCurrent1, buffer1[j_offset], j);
+					ReductionOp::update(current2, argCurrent2, buffer2[j_offset], j);
+					ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j);
+					j_offset += inner_stride;
+				}
+				//tail
+				for (Z j = loopCount4; j < loopCountEnd; j++) {
+					ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j);
+					j_offset += inner_stride;
+				}
+				//merge
+				argCurrent1 += loopCount4;
+				argCurrent2 += 2 * loopCount4;
+				argCurrent3 += 3 * loopCount4;
+				ReductionOp::update(current, argCurrent, current1, argCurrent1);
+				ReductionOp::update(current, argCurrent, current2, argCurrent2);
+				ReductionOp::update(current, argCurrent, current3, argCurrent3);
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1Block4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong& loopCount)
+			{
+				LOG_CALLS(0)
+				Z argCurrent = 0;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				X current = buffer[0];
+				X current1 = buffer1[0];
+				X current2 = buffer2[0];
+				X current3 = buffer3[0];
+				for (Z j = 0; j < loopCount; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j], j);
+					ReductionOp::update(current1, argCurrent1, buffer1[j], j);
+					ReductionOp::update(current2, argCurrent2, buffer2[j], j);
+					ReductionOp::update(current3, argCurrent3, buffer3[j], j);
+				}
+				*output = argCurrent;
+				*output1 = argCurrent1;
+				*output2 = argCurrent2;
+				*output3 = argCurrent3;
+				return;
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			FORCEINLINE void indexInnerReductionRank1Block4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3, Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong& loopCount, const Nd4jLong& inner_stride)
+			{
+				LOG_CALLS(0)
+				Z argCurrent = 0;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				X current = buffer[0];
+				X current1 = buffer1[0];
+				X current2 = buffer2[0];
+				X current3 = buffer3[0];
+				Nd4jLong j_offset = 0;
+				for (Z j = 0; j < loopCount; j++) {
+					ReductionOp::update(current, argCurrent, buffer[j_offset], j);
+					ReductionOp::update(current1, argCurrent1, buffer1[j_offset], j);
+					ReductionOp::update(current2, argCurrent2, buffer2[j_offset], j);
+					ReductionOp::update(current3, argCurrent3, buffer3[j_offset], j);
+					j_offset += inner_stride;
+				}
+				*output = argCurrent;
+				*output1 = argCurrent1;
+				*output2 = argCurrent2;
+				*output3 = argCurrent3;
+				return;
+			}
+
+			template<typename X, typename Z, typename ReductionOp, size_t constRank, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReductionConstRankBlock4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3,
+				Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong* bases, const Nd4jLong* strides,
+				const Nd4jLong& outerLoopCount, const Nd4jLong& innerLoopCount)
+			{
+				LOG_CALLS(0)
+				//skip 1 from the beginning or end depending the Order 
+				constexpr size_t updated_index = LastIndexFaster ? 0 : 1;
+				constexpr size_t updated_rank = constRank - 1;
+				sd::CoordsState<updated_rank - 1> cst;
+				//we skip 1  
+				size_t offset = sd::init_coords<updated_rank, 0, LastIndexFaster>(cst, 0, bases + updated_index, strides + updated_index);
+				Z startIndex = 0;
+				Z argCurrent = 0;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				X current = buffer[0];
+				X current1 = buffer1[0];
+				X current2 = buffer2[0];
+				X current3 = buffer3[0];
+				//LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					const X* inner_buffer1 = &(buffer1[offset]);
+					const X* inner_buffer2 = &(buffer2[offset]);
+					const X* inner_buffer3 = &(buffer3[offset]);
+					//typename std::make_signed<Z>::type iArgMax = -1; 
+					for (Z j = 0; j < innerLoopCount; j++) {
+						ReductionOp::update(current, argCurrent, inner_buffer[j], j + startIndex);
+						ReductionOp::update(current1, argCurrent1, inner_buffer1[j], j + startIndex);
+						ReductionOp::update(current2, argCurrent2, inner_buffer2[j], j + startIndex);
+						ReductionOp::update(current3, argCurrent3, inner_buffer3[j], j + startIndex);
+					}
+					//we skip 1
+					offset = sd::inc_coords<updated_rank, 0, LastIndexFaster>(cst, offset);
+					startIndex += innerLoopCount;
+				}
+				*output = argCurrent;
+				*output1 = argCurrent1;
+				*output2 = argCurrent2;
+				*output3 = argCurrent3;
+				return;
+			}
+
+			template<typename X, typename Z, typename ReductionOp, size_t constRank, bool LastIndexFaster = true>
+			FORCEINLINE void indexInnerReductionConstRankBlock4(const X* buffer, const X* buffer1, const X* buffer2, const X* buffer3,
+				Z* output, Z* output1, Z* output2, Z* output3, const Nd4jLong* bases, const Nd4jLong* strides,
+				const Nd4jLong& outerLoopCount, const Nd4jLong& innerLoopCount, const Nd4jLong& inner_stride)
+			{
+				LOG_CALLS(0)
+				//skip 1 from the beginning or end depending the Order 
+				constexpr size_t updated_index = LastIndexFaster ? 0 : 1;
+				constexpr size_t updated_rank = constRank - 1;
+				sd::CoordsState<updated_rank - 1> cst;
+				//we skip 1  
+				size_t offset = sd::init_coords<updated_rank, 0, LastIndexFaster>(cst, 0, bases + updated_index, strides + updated_index);
+				Z startIndex = 0;
+				Z argCurrent = 0;
+				Z argCurrent1 = 0;
+				Z argCurrent2 = 0;
+				Z argCurrent3 = 0;
+				X current = buffer[0];
+				X current1 = buffer1[0];
+				X current2 = buffer2[0];
+				X current3 = buffer3[0];
+				//LOG_CALLS(0)
+				for (Z i = 0; i < outerLoopCount; i++) {
+					const X* inner_buffer = &(buffer[offset]);
+					const X* inner_buffer1 = &(buffer1[offset]);
+					const X* inner_buffer2 = &(buffer2[offset]);
+					const X* inner_buffer3 = &(buffer3[offset]);
+					//typename std::make_signed<Z>::type iArgMax = -1;
+					Nd4jLong inner_offset = 0;
+					for (Z j = 0; j < innerLoopCount; j++) {
+						ReductionOp::update(current, argCurrent, inner_buffer[inner_offset], j + startIndex);
+						ReductionOp::update(current1, argCurrent1, inner_buffer1[inner_offset], j + startIndex);
+						ReductionOp::update(current2, argCurrent2, inner_buffer2[inner_offset], j + startIndex);
+						ReductionOp::update(current3, argCurrent3, inner_buffer3[inner_offset], j + startIndex);
+						inner_offset += inner_stride;
+					}
+					//we skip 1
+					offset = sd::inc_coords<updated_rank, 0, LastIndexFaster>(cst, offset);
+					startIndex += innerLoopCount;
+				}
+				*output = argCurrent;
+				*output1 = argCurrent1;
+				*output2 = argCurrent2;
+				*output3 = argCurrent3;
+				return;
+			}
+
+			template<typename X, typename Z, typename ReductionOp, bool LastIndexFaster = true>
+			void argIndexCase1Scalar(const  int& second_rank,const Nd4jLong* inner_bases,const Nd4jLong* inner_strides, const  X* bufferX, Z* outputZ)
+			{
+				Nd4jLong inner_total;
+				Nd4jLong inner_last = 0;
+				int maxThreads = sd::Environment::getInstance()->maxMasterThreads();
+				if (second_rank == 1) {
+					inner_total = inner_bases[0]; 
+					if (inner_total  < threadingThreshold) {
+						maxThreads = 1;
+					}
+				}
+				else {
+					inner_total = getLength<LastIndexFaster>(inner_bases, second_rank, 1, inner_last);
+					if (inner_total * inner_last < threadingThreshold) {
+						maxThreads = 1;
+					}
+				}
+
+				
+
+				std::unique_ptr<X[]> maxValues(new X[maxThreads]);
+				std::unique_ptr<Z[]> maxIndices(new Z[maxThreads]);
+				X* ptrMaxValues = maxValues.get();
+				Z* ptrMaxIndices = maxIndices.get();
+				auto func = [ptrMaxValues, ptrMaxIndices, inner_last, second_rank, inner_bases, inner_strides, bufferX](uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void {
+					//LOG_CALLS(0)
+					const Nd4jLong inner_stride = LastIndexFaster ? inner_strides[second_rank - 1] : inner_strides[0];
+					Z argCurrent; X current;
+					if (second_rank == 1) {
+						const Nd4jLong loopTotal = stop - start;
+						if (inner_stride == 1) {
+							indexInnerReductionRank1Block4WithMerge<X, Z, ReductionOp>(&(bufferX[start]), current, argCurrent, loopTotal);
+						}
+						else {
+							indexInnerReductionRank1Block4WithMerge<X, Z, ReductionOp>(&(bufferX[start * inner_stride]), current, argCurrent, loopTotal, inner_stride);
+						}
+						ptrMaxIndices[thread_id] = argCurrent + start;
+					}
+					else {
+						if (inner_stride == 1) {
+							indexInnerReduction<X, Z, ReductionOp, LastIndexFaster>(second_rank, bufferX, current, argCurrent, inner_bases, inner_strides, start, stop, inner_last, inner_stride);
+						}
+						else {
+							indexInnerReduction<X, Z, ReductionOp, LastIndexFaster>(second_rank, bufferX, current, argCurrent, inner_bases, inner_strides, start, stop, inner_last, inner_stride);
+						}
+						ptrMaxIndices[thread_id] = argCurrent;
+					}
+					ptrMaxValues[thread_id] = current;
+				};
+#if 0
+				int Count = 0;
+				func(0, 0, inner_total, 1);
+#else
+				int Count = samediff::Threads::parallel_tad(func, 0, inner_total, 1, maxThreads);
+#endif
+				Z arg = 0;
+				X current = ptrMaxValues[0];
+
+				for (Z i = 1; i < Count; i++) {
+					ReductionOp::update(current, arg, ptrMaxValues[i], i);
+				}
+
+				*outputZ = ptrMaxIndices[arg];
+			}
+
+
+			template<typename X, typename Z, typename ReductionOp, typename Movement, bool LastIndexFaster = true>
+			void argReductionInnerCases(Movement& movement, Nd4jLong loopTotal, const int& second_rank,const Nd4jLong* inner_bases,const Nd4jLong* inner_strides, const X* bufferX, Z* outputZ)
+			{
+
+				Nd4jLong inner_stride = true /*LastIndexFaster*/ ? inner_strides[second_rank - 1] : inner_strides[0];
+
+				Nd4jLong loopTotal_K = loopTotal / 4;
+				Nd4jLong loopTotal_Tail = loopTotal & 3;
+				if (inner_stride == 1) {
+					if (second_rank == 1) {
+						LOG_CALLS(0)
+						Nd4jLong inner_total = getLength<true>(inner_bases, second_rank);
+						for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+							const X* buffer0 = &(bufferX[movement.First()]);
+							Z* output0 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer1 = &(bufferX[movement.First()]);
+							Z* output1 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer2 = &(bufferX[movement.First()]);
+							Z* output2 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer3 = &(bufferX[movement.First()]);
+							Z* output3 = &(outputZ[movement.Second()]);
+							movement.increment();
+							indexInnerReductionRank1Block4<X, Z, ReductionOp>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_total);
+
+						}
+						if (inner_total >= 2048) {
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionRank1Block4WithMerge<X, Z, ReductionOp>(buffer0, current, outputZ[movement.Second()], inner_total);
+								movement.increment();
+							}
+						}
+						else {
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionRank1<X, Z, ReductionOp>(buffer0, current, outputZ[movement.Second()], inner_total);
+								movement.increment();
+							}
+						}
+
+					}
+					else {
+						Nd4jLong inner_last;
+						Nd4jLong inner_loop = getLength<true>(inner_bases, second_rank, 1, inner_last);
+						if (second_rank == 2) {
+							LOG_CALLS(1)
+							for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+								const X* buffer0 = &(bufferX[movement.First()]);
+								Z* output0 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer1 = &(bufferX[movement.First()]);
+								Z* output1 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer2 = &(bufferX[movement.First()]);
+								Z* output2 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer3 = &(bufferX[movement.First()]);
+								Z* output3 = &(outputZ[movement.Second()]);
+								movement.increment();
+								indexInnerReductionConstRankBlock4<X, Z, ReductionOp, 2>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides,
+									inner_loop, inner_last);
+
+							}
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionConstRank<X, Z, ReductionOp, 2>(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, inner_loop, inner_last);
+								movement.increment();
+							}
+
+						}
+						else if (second_rank == 3) {
+							LOG_CALLS(2)
+							for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+								const X* buffer0 = &(bufferX[movement.First()]);
+								Z* output0 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer1 = &(bufferX[movement.First()]);
+								Z* output1 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer2 = &(bufferX[movement.First()]);
+								Z* output2 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer3 = &(bufferX[movement.First()]);
+								Z* output3 = &(outputZ[movement.Second()]);
+								movement.increment();
+								indexInnerReductionConstRankBlock4<X, Z, ReductionOp, 3>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides,
+									inner_loop, inner_last);
+
+							}
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionConstRank<X, Z, ReductionOp, 3>(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides,
+									inner_loop, inner_last);
+								movement.increment();
+							}
+
+						}
+						else {
+							LOG_CALLS(3)
+							//nd4j_printf("-----%d \n", loopTotal);
+							for (Nd4jLong i = 0; i < loopTotal; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReduction<X, Z, ReductionOp>(second_rank, buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, 0,
+									inner_loop, inner_last);
+								movement.increment();
+							}
+
+						}
+					}
+
+				}
+				else {
+					if (second_rank == 1) {
+						LOG_CALLS(10)
+						Nd4jLong inner_total = getLength<true>(inner_bases, second_rank);
+						for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+							const X* buffer0 = &(bufferX[movement.First()]);
+							Z* output0 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer1 = &(bufferX[movement.First()]);
+							Z* output1 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer2 = &(bufferX[movement.First()]);
+							Z* output2 = &(outputZ[movement.Second()]);
+							movement.increment();
+							const X* buffer3 = &(bufferX[movement.First()]);
+							Z* output3 = &(outputZ[movement.Second()]);
+							movement.increment();
+							indexInnerReductionRank1Block4<X, Z, ReductionOp>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_total, inner_stride);
+
+						}
+						if (inner_total >= 2048) {
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionRank1Block4WithMerge<X, Z, ReductionOp>(buffer0, current, outputZ[movement.Second()], inner_total, inner_stride);
+								movement.increment();
+							}
+						}
+						else {
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionRank1<X, Z, ReductionOp>(buffer0, current, outputZ[movement.Second()], inner_total, inner_stride);
+								movement.increment();
+							}
+						}
+
+					}
+					else {
+						Nd4jLong inner_last;
+						Nd4jLong inner_loop = getLength<true>(inner_bases, second_rank, 1, inner_last);
+						if (second_rank == 2) {
+							LOG_CALLS(11)
+							for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+								const X* buffer0 = &(bufferX[movement.First()]);
+								Z* output0 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer1 = &(bufferX[movement.First()]);
+								Z* output1 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer2 = &(bufferX[movement.First()]);
+								Z* output2 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer3 = &(bufferX[movement.First()]);
+								Z* output3 = &(outputZ[movement.Second()]);
+								movement.increment();
+								indexInnerReductionConstRankBlock4<X, Z, ReductionOp, 2>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides,
+									inner_loop, inner_last, inner_stride);
+
+							}
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionConstRank<X, Z, ReductionOp, 2>(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides,
+									inner_loop, inner_last, inner_stride);
+								movement.increment();
+							}
+
+						}
+						else if (second_rank == 3) {
+							LOG_CALLS(12)
+							for (Nd4jLong i = 0; i < loopTotal_K; i++) {
+								const X* buffer0 = &(bufferX[movement.First()]);
+								Z* output0 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer1 = &(bufferX[movement.First()]);
+								Z* output1 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer2 = &(bufferX[movement.First()]);
+								Z* output2 = &(outputZ[movement.Second()]);
+								movement.increment();
+								const X* buffer3 = &(bufferX[movement.First()]);
+								Z* output3 = &(outputZ[movement.Second()]);
+								movement.increment();
+								indexInnerReductionConstRankBlock4<X, Z, ReductionOp, 3>(buffer0, buffer1, buffer2, buffer3, output0, output1, output2, output3, inner_bases, inner_strides,
+									inner_loop, inner_last, inner_stride);
+
+							}
+							for (Nd4jLong i = 0; i < loopTotal_Tail; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReductionConstRank<X, Z, ReductionOp, 3>(buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides,
+									inner_loop, inner_last, inner_stride);
+								movement.increment();
+							}
+
+						}
+						else {
+							LOG_CALLS(13)
+							//nd4j_printf("-------%d inner loop %d inner_last %d\n", loopTotal, inner_loop,inner_last);
+							for (Nd4jLong i = 0; i < loopTotal; i++) {
+								X current;
+								const X* buffer0 = &(bufferX[movement.First()]);
+								indexInnerReduction<X, Z, ReductionOp>(second_rank, buffer0, current, outputZ[movement.Second()], inner_bases, inner_strides, 0,
+									inner_loop, inner_last, inner_stride);
+								movement.increment();
+							}
+
+						}
+					}
+
+				}
+
+			}
+
+			template<typename X, typename Z, typename ReductionOp, bool LastIndexFaster = true>
+			void argIndexCaseNonScalar(const  int& first_rank, const int& output_rank, bool squashed, const  int& second_rank,
+				const Nd4jLong*& outer_bases,const Nd4jLong* outer_strides,const Nd4jLong* output_strides, const Nd4jLong &output_stride,
+				const Nd4jLong*& inner_bases,const Nd4jLong* inner_strides, const X* bufferX, Z* outputZ)
+			{
+
+				Nd4jLong total = getLength<LastIndexFaster>(outer_bases, first_rank);
+				Nd4jLong inner_stride = true /*LastIndexFaster*/ ? inner_strides[second_rank - 1] : inner_strides[0];
+				Nd4jLong outer_stride =  LastIndexFaster  ? outer_strides[second_rank - 1] : outer_strides[0];
+				auto func = [first_rank, output_rank, squashed, outer_bases, outer_strides, output_strides, output_stride, second_rank, inner_bases, inner_strides, bufferX, outputZ](uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void {
+
+					Nd4jLong loopTotal = stop - start;
+					Nd4jLong stride = LastIndexFaster ? outer_strides[first_rank - 1] : outer_strides[0];
+					if (first_rank == 1) {
+
+						if (stride == 1) {
+							ZipGenericCoordsRank1Stride1 movement;
+							movement.init(nullptr, nullptr, nullptr, 0, start);
+							argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+						}
+						else {
+							ZipGenericCoordsRank1BothStrideN movement;
+							movement.init(nullptr, &stride, &output_stride, 0, start);
+							argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+						}
+
+					}
+					else if (squashed && first_rank <= output_rank) {
+						if (first_rank == 2) {
+							if (output_stride == 1) {
+								ZipGenericCoordsConstMovementSecondStride1<2, LastIndexFaster> movement;
+								movement.init(outer_bases, outer_strides, nullptr, first_rank, start);
+								argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+							}
+							else {
+								ZipGenericCoordsConstMovementSecondStrideN<2, LastIndexFaster> movement;
+								movement.init(outer_bases, outer_strides, &output_stride, first_rank, start);
+								argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+							}
+						}
+						else if (first_rank == 3) {
+							if (output_stride == 1) {
+								ZipGenericCoordsConstMovementSecondStride1<3, LastIndexFaster> movement;
+								movement.init(outer_bases, outer_strides, nullptr, first_rank, start);
+								argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+							}
+							else {
+								ZipGenericCoordsConstMovementSecondStrideN<3, LastIndexFaster> movement;
+								movement.init(outer_bases, outer_strides, &output_stride, first_rank, start);
+								argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+							}
+						}
+						else {
+							ZipGenericCoordsMovementSecondStrideN< LastIndexFaster> movement;
+							movement.init(outer_bases, outer_strides, &output_stride, first_rank, start);
+
+							argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+						}
+
+					}
+					else { 
+						ZipGenericCoordsMovement<LastIndexFaster> movement;
+						movement.init(outer_bases, outer_strides, output_strides, first_rank, start);
+
+						argReductionInnerCases<X, Z, ReductionOp>(movement, loopTotal, second_rank, inner_bases, inner_strides, bufferX, outputZ);
+
+					}
+
+				};
+#if 0
+				func(0, 0, total, 1);
+#else
+				//
+				uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads();
+			    Nd4jLong inner_total = getLength<true>(inner_bases, second_rank);
+				if (total * inner_total <= threadingThreshold) {
+						numThreads = 1;
+				}
+				else {
+					if (inner_stride > outer_stride && total <= 256) {
+						auto desired = total > 4 ? (total / 4) : 1;
+						numThreads = numThreads > desired ? desired : numThreads;
+					}
+				}
+				 
+				samediff::Threads::parallel_tad(func, 0, total, 1, numThreads);
+#endif
+			}
+
+			template<typename X, typename Z, typename ReductionOp>
+			void  argIndex_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+				char input_order = input.ordering();
+				bool try_squash_outer = (input_order == output.ordering()) && output.ews() != 0;
+				const Nd4jLong* input_shapeInfo = input.shapeInfo();
+				const Nd4jLong* output_shapeInfo = output.shapeInfo();
+				const Nd4jLong  rank = input_shapeInfo[0];
+				const Nd4jLong* input_bases = &(input_shapeInfo[1]);
+				const Nd4jLong* input_strides = &(input_shapeInfo[rank + 1]);
+				const Nd4jLong  output_rank = output_shapeInfo[0];
+				const Nd4jLong* output_strides = &(output_shapeInfo[output_rank + 1]);
+				Nd4jLong new_bases[MAX_RANK];
+				Nd4jLong new_strides[MAX_RANK];
+				int first_begin, first_end, second_begin, second_end;
+				//rePartition into two parts based on the selection
+				rePartition(input_order, dimensions, rank, input_bases, input_strides, new_bases, new_strides, first_begin, first_end, second_begin, second_end, try_squash_outer, input_order == 'c');
+				int first_rank = first_end - first_begin; //the first rank can be 0 for scalar cases
+				int second_rank = second_end - second_begin;
+				auto bufferX = input.bufferAsT<X>();
+				auto outputZ = output.bufferAsT<Z>();
+				const Nd4jLong* outer_bases = &(new_bases[first_begin]);
+				const Nd4jLong* outer_strides = &(new_strides[first_begin]);
+				const Nd4jLong* inner_bases = &(new_bases[second_begin]);
+				const Nd4jLong* inner_strides = &(new_strides[second_begin]);
+				const Nd4jLong output_stride = output.ordering()  == 'c' ? output_strides[output_rank-1]:output_strides[0];
+				if (input_order == 'c') {
+					if (first_rank == 0) {
+						argIndexCase1Scalar<X, Z, ReductionOp>(second_rank, inner_bases, inner_strides, bufferX, outputZ);
+					}
+					else {
+						argIndexCaseNonScalar<X, Z, ReductionOp>(first_rank, output_rank, try_squash_outer, second_rank, outer_bases, outer_strides, output_strides,
+							output_stride,inner_bases, inner_strides, bufferX, outputZ);
+					}
+				}
+				else {
+					if (first_rank == 0) {
+						LOG_CALLS(0);
+						if (second_rank == 1) {
+							argIndexCase1Scalar<X, Z, ReductionOp, false>(second_rank, inner_bases, inner_strides, bufferX, outputZ);
+						}
+						else {
+							argIndexCase1Scalar<X, Z, ReductionOp, true>(second_rank, inner_bases, inner_strides, bufferX, outputZ);
+						}
+					}
+					else {
+						LOG_CALLS(1);
+						argIndexCaseNonScalar<X, Z, ReductionOp,false>(first_rank, output_rank, try_squash_outer, second_rank, outer_bases, outer_strides, output_strides,
+							output_stride, inner_bases, inner_strides, bufferX, outputZ);
+					}
+				}
+			}
+
+			template <typename X, typename Z>
+			struct IndexMax {
+				static FORCEINLINE void  update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) {
+					if (candidate > current) {
+						current = candidate;
+						currentIndex = candidateIndex;
+					}
+				}
+			};
+
+			template <typename X, typename Z>
+			struct IndexMin {
+				static FORCEINLINE void  update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) {
+					if (candidate < current) {
+						current = candidate;
+						currentIndex = candidateIndex;
+					}
+				}
+			};
+
+			template <typename X, typename Z>
+			struct IndexAbsMax {
+				static FORCEINLINE void  update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) {
+					auto absCandidate = sd::math::nd4j_abs<X>(candidate);
+					if (absCandidate > current) {
+						current = absCandidate;
+						currentIndex = candidateIndex;
+					}
+				}
+			};
+
+			template <typename X, typename Z>
+			struct IndexAbsMin {
+				static FORCEINLINE void  update(X& current, Z& currentIndex, const X& candidate, const Z& candidateIndex) {
+					auto absCandidate = sd::math::nd4j_abs<X>(candidate);
+					if (absCandidate < current) {
+						current = absCandidate;
+						currentIndex = candidateIndex;
+					}
+				}
+			};
+
+			
+			//////////////////////////////////////////////////////////////////////////
+			template<typename X, typename Z>
+			void  argMax_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+				return argIndex_<X, Z, IndexMax<X, Z>>(input, output, dimensions);
+			}
+
+			template<typename X, typename Z>
+			void  argMin_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+				return argIndex_<X, Z, IndexMin<X, Z>>(input, output, dimensions);
+			}
+
+			template<typename X, typename Z>
+			void  argAbsMax_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+				return argIndex_<X, Z, IndexAbsMax<X, Z>>(input, output, dimensions);
+			}
+
+			template<typename X, typename Z>
+			void  argAbsMin_(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+				return argIndex_<X, Z, IndexAbsMin<X, Z>>(input, output, dimensions);
+			}
+		}
+	}
+}
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
new file mode 100644
index 000000000..9876417df
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
@@ -0,0 +1,106 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#include <ops/declarable/helpers/reductions.h>
+#include <legacy/NativeOpExecutioner.h>
+#include <helpers/ConstantTadHelper.h>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+            //////////////////////////////////////////////////////////////////////////
+            void  argMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                NDArray::prepareSpecialUse({&output}, {&input});
+                if (output.isScalar()) {
+                    NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
+                }
+                else {
+                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+
+                    NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax,
+                        input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
+                        nullptr,
+                        output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(),
+                        (int*) nullptr, dimensions.size(),
+                        tadPack.specialShapeInfo(), tadPack.specialOffsets());
+                }
+
+                NDArray::registerSpecialUse({ &output }, { &input });
+            }
+
+            void  argMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                NDArray::prepareSpecialUse({ &output }, { &input });
+                if (output.isScalar()) {
+                    NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
+                }
+                else {
+                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+
+                    NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin,
+                        input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
+                        nullptr,
+                        output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(),
+                        (int*) nullptr, dimensions.size(),
+                        tadPack.specialShapeInfo(), tadPack.specialOffsets());
+                }
+
+                NDArray::registerSpecialUse({ &output }, { &input });
+            }
+
+            void  argAbsMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                NDArray::prepareSpecialUse({ &output }, { &input });
+                if (output.isScalar()) {
+                    NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
+                }
+                else {
+                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+
+                    NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax,
+                        input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
+                        nullptr,
+                        output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(),
+                        (int*) nullptr, dimensions.size(),
+                        tadPack.specialShapeInfo(), tadPack.specialOffsets());
+                }
+
+                NDArray::registerSpecialUse({ &output }, { &input });
+            }
+
+            void  argAbsMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions) {
+                NDArray::prepareSpecialUse({ &output }, { &input });
+                if (output.isScalar()) {
+                    NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
+                }
+                else {
+                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+
+                    NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin,
+                        input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
+                        nullptr,
+                        output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo(),
+                                                         (int *) nullptr, dimensions.size(),
+                                                         tadPack.specialShapeInfo(), tadPack.specialOffsets());
+                }
+
+                NDArray::registerSpecialUse({&output}, {&input});
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/reductions.h b/libnd4j/include/ops/declarable/helpers/reductions.h
new file mode 100644
index 000000000..ee199fd16
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/reductions.h
@@ -0,0 +1,41 @@
+
+/*******************************************************************************
+ * Copyright (c) 2019 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+ //
+ // @author AbdelRauf    (rauf@konduit.ai)
+ //
+
+#ifndef LIBND4J_HELPERS_REDUCTIONS_H
+#define LIBND4J_HELPERS_REDUCTIONS_H
+
+#include <system/op_boilerplate.h>
+#include <math/templatemath.h>
+#include <array/NDArray.h>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+
+            void argMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+            void argAbsMax(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+            void argMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+            void argAbsMin(const NDArray& input, NDArray& output, const std::vector<int>& dimensions);
+            
+        }
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
index ce5038020..f111a888a 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
@@ -40,6 +40,19 @@ public:
     }
 };
 
+
+TEST_F(DeclarableOpsTests19, test_argmax_maxint_vector_1) {
+    auto x = NDArrayFactory::create<float>('c', {3}, {0.1f, 0.5f, 0.7f});
+    auto z = NDArrayFactory::create<Nd4jLong>(0);
+    auto e = NDArrayFactory::create<Nd4jLong>(2);
+
+    sd::ops::argmax op;
+    auto status = op.execute({&x}, {&z}, {DataTypeUtils::max<int>()});
+    ASSERT_EQ(Status::OK(), status);
+    ASSERT_EQ(e, z);
+}
+
+
 TEST_F(DeclarableOpsTests19, test_threshold_encode_1) {
     auto x = NDArrayFactory::create<double>('c', {3}, {1.5, 2.5, -3.5});
     auto exp_encoded = NDArrayFactory::create<int>('c', {7}, {3, 3, 1056964608, 0, 1, 2, -3});
@@ -276,6 +289,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
 }
 
 
+
 TEST_F(DeclarableOpsTests19, test_matmul_ccc) {
     auto x = NDArrayFactory::create<float>('c', {10, 10});
     auto y = NDArrayFactory::create<float>('c', {10, 10});
diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
index 166ba058f..f8086c9fe 100644
--- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
@@ -43,9 +43,12 @@
 #include <array>
 #include <performance/benchmarking/FullBenchmarkSuit.h>
 #include <performance/benchmarking/LightBenchmarkSuit.h>
-
+#include <random>
 #include <ops/declarable/helpers/legacy_helpers.h>
 #include <ops/declarable/helpers/addBias.h>
+#include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
+#include <helpers/LoopsCoordsHelper.h>
 
 using namespace sd;
 using namespace sd::graph;
@@ -275,6 +278,256 @@ TEST_F(PlaygroundTests, test_one_off_ops_1) {
     op.execute({&x, &y}, {&z});
 }
 
+#if defined(INDEX_REDUCTIONS_BENCH_TESTS)
+//temporarly, testing against the original one
+void original_argmax(const NDArray& input, std::vector<int>& axis, NDArray& output) {
+    sd::ops::helpers::adjustAxis(input.rankOf(), axis);
+    input.applyIndexReduce(sd::indexreduce::IndexMax, output, axis);
+}
+
+template<typename T>
+void fill_random(sd::NDArray& arr) {
+    Nd4jLong coords[MAX_RANK] = {};
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    //for floats
+    std::uniform_real_distribution<T> dis((T)-10.0, (T)22.9);
+    T* x = arr.bufferAsT<T>();
+    Nd4jLong* shapeInfo = arr.getShapeInfo();
+    Nd4jLong* strides = arr.stridesOf();
+    Nd4jLong rank = shapeInfo[0];
+    Nd4jLong* bases = &(shapeInfo[1]);
+    size_t t = 1;
+    for (size_t i = 0; i < rank ; i++) {
+        t *= bases[i];
+    }
+    size_t offset = 0;
+    if (arr.ordering() == 'c') {
+
+        for (size_t i = 0; i < t; i++) {
+            x[offset] = dis(gen) ;
+            offset = sd::inc_coords(bases, strides, coords, offset, rank);
+        }
+
+    }
+    else {
+
+        for (size_t i = 0; i < t; i++) {
+            x[offset] = dis(gen) ;
+            offset = sd::inc_coords<false>(bases, strides, coords, offset, rank);
+        }
+
+    }
+}
+ 
+void testLegacy(bool random) {
+#if 0
+    int bases[] = { 3, 2, 4, 5, 7 };
+    constexpr int Loop = 1;
+#else
+    int bases[] = { 8, 32, 64, 32, 64 };
+    constexpr int Loop = 10;
+#endif
+    constexpr int N = 5;
+
+    auto x = NDArrayFactory::create<float>('c', { bases[0], bases[1], bases[2], bases[3], bases[4] });
+    if (!random) {
+        x.linspace(1);
+    }
+    else{
+        fill_random<float>(x);
+     }
+
+#define COMBINATIONS 1
+#if COMBINATIONS
+//https://www.rosettacode.org/wiki/Combinations#C.2B.2B
+for (int k = N; k >= 1; k--) {
+
+    std::string bitmask(k, 1); // K leading 1's
+    bitmask.resize(N, 0); // N-K trailing 0's
+
+    do {
+
+
+        std::vector<int> dimension;
+
+        std::vector<Nd4jLong> output_bases;
+
+        for (int i = 0; i < N; ++i) // [0..N-1] integers
+        {
+            if (bitmask[i])  dimension.push_back(i);
+            else {
+                output_bases.push_back(bases[i]);
+            }
+        }
+#else
+std::vector<int> dimension = { 0,1,2,3 };
+int k = 4;
+#endif
+auto dim = NDArrayFactory::create<int>(dimension);
+
+#if 1 
+nd4j_printf("C(N:%d K:%d) \n", N, k);
+dim.printIndexedBuffer("Dimension");
+for (int xind : dimension) {
+    nd4j_printf(" %d ,", bases[xind]);
+}
+nd4j_printf("%s", "\n");
+#endif
+
+
+
+std::vector<Nd4jLong> values;
+sd::ResultSet result;
+for (int e = 0; e < Loop; e++) {
+    auto timeStart = std::chrono::system_clock::now();
+    NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create<Nd4jLong>('c', output_bases) : NDArrayFactory::create<Nd4jLong>(0);
+    original_argmax(x, dimension, exp);
+    auto timeEnd = std::chrono::system_clock::now();
+    auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
+    values.emplace_back(outerTime);
+}
+ 
+std::sort(values.begin(), values.end());
+
+nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
+#if COMBINATIONS
+
+    } while (std::prev_permutation(bitmask.begin(), bitmask.end()));
+
+}
+#endif
+}
+
+#define DEBUG 1
+
+void testNewReduction(bool random, bool checkCorrectness = false , char order ='c') {
+    std::vector<Nd4jLong> arr_dimensions;
+#if defined(DEBUG)
+    int bases[] = { 3, 2, 3, 3, 5 ,4,7,4,7,7 };
+    constexpr int Loop = 1;
+    constexpr int N = 10;
+#else
+    int bases[] = { 8, 32, 64, 32, 64 };
+    constexpr int Loop = 10;
+    constexpr int N = 5;
+
+#endif
+    
+    for (int i = 0; i < N; i++) {
+        arr_dimensions.push_back(bases[i]);
+    }
+    auto x = NDArrayFactory::create<float>(order,arr_dimensions);
+    if (!random) {
+        x.linspace(1);
+    }
+    else {
+        fill_random<float>(x);
+    }
+
+#define COMBINATIONS 1
+#if COMBINATIONS
+    //https://www.rosettacode.org/wiki/Combinations#C.2B.2B
+    for (int k = N; k >= 1; k--) {
+
+        std::string bitmask(k, 1); // K leading 1's
+        bitmask.resize(N, 0); // N-K trailing 0's
+
+        do {
+
+
+            std::vector<int> dimension;
+
+            std::vector<Nd4jLong> output_bases;
+
+            for (int i = 0; i < N; ++i) // [0..N-1] integers
+            {
+                if (bitmask[i])  dimension.push_back(i);
+                else {
+                    output_bases.push_back(bases[i]);
+                }
+            }
+#else
+    std::vector<int> dimension = { 0,1,2,3 };
+    int k = 4;
+#endif
+    auto dim = NDArrayFactory::create<int>(dimension);
+
+#if 1 
+    nd4j_printf("C(N:%d K:%d) \n", N, k);
+    dim.printIndexedBuffer("Dimension");
+    for (int xind : dimension) {
+        nd4j_printf(" %d ,", bases[xind]);
+    }
+    nd4j_printf("%s", "\n");
+#endif
+
+
+    sd::ops::argmax op;
+    std::vector<Nd4jLong> values;
+    sd::ResultSet result;
+    for (int e = 0; e < Loop; e++) {
+        auto timeStart = std::chrono::system_clock::now();
+        result = op.evaluate({ &x, &dim }, {}, {});
+        auto timeEnd = std::chrono::system_clock::now();
+        auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
+        values.emplace_back(outerTime);
+    }
+    auto z = result.at(0);
+
+    if (checkCorrectness) {
+        //check for the correctness
+        NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create<Nd4jLong>('c', output_bases) : NDArrayFactory::create<Nd4jLong>(0);
+        original_argmax(x, dimension, exp);
+   
+
+#if  0// defined(DEBUG)
+     x.printIndexedBuffer("X");
+    exp.printIndexedBuffer("Expected");
+    z->printIndexedBuffer("Z");
+#endif
+ 
+        ASSERT_TRUE(exp.isSameShape(z));
+        ASSERT_TRUE(exp.equalsTo(z));
+    }
+    std::sort(values.begin(), values.end());
+
+    nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
+#if COMBINATIONS
+
+        } while (std::prev_permutation(bitmask.begin(), bitmask.end()));
+
+    }
+#endif
+}
+
+constexpr bool test_corr = true;
+#if !defined(DEBUG)
+TEST_F(PlaygroundTests, ArgMaxPerfLinspace) {
+    testNewReduction(false, test_corr);
+}
+#endif
+ 
+TEST_F(PlaygroundTests, ArgMaxPerfRandom) {
+    testNewReduction(true, test_corr);
+}
+
+TEST_F(PlaygroundTests, ArgMaxPerfRandomOrderF) {
+    testNewReduction(true, test_corr, 'f');
+}
+ 
+#if !defined(DEBUG)
+TEST_F(PlaygroundTests, ArgMaxPerfLegacyLinspace) {
+    testLegacy(false);
+}
+
+TEST_F(PlaygroundTests, ArgMaxPerfLegacyRandom) {
+    testLegacy(true);
+}
+
+#endif
+
+#endif
 
 /*
 
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java
index 79bd82ad3..8190c4849 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDBaseOps.java
@@ -106,7 +106,7 @@ public class SDBaseOps {
   public SDVariable argmax(SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, keepDims, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable();
   }
 
   /**
@@ -130,7 +130,7 @@ public class SDBaseOps {
   public SDVariable argmax(String name, SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, keepDims, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -153,7 +153,7 @@ public class SDBaseOps {
   public SDVariable argmax(SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, false, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable();
   }
 
   /**
@@ -176,7 +176,7 @@ public class SDBaseOps {
   public SDVariable argmax(String name, SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(sd,in, false, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -203,7 +203,7 @@ public class SDBaseOps {
   public SDVariable argmin(SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, keepDims, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable();
   }
 
   /**
@@ -230,7 +230,7 @@ public class SDBaseOps {
   public SDVariable argmin(String name, SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, keepDims, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -256,7 +256,7 @@ public class SDBaseOps {
   public SDVariable argmin(SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, false, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable();
   }
 
   /**
@@ -282,7 +282,7 @@ public class SDBaseOps {
   public SDVariable argmin(String name, SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(sd,in, false, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java
index 4d42b2295..15a26059f 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDMath.java
@@ -1875,7 +1875,7 @@ public class SDMath extends SDOps {
   public SDVariable iamax(SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, false, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable();
   }
 
   /**
@@ -1890,7 +1890,7 @@ public class SDMath extends SDOps {
   public SDVariable iamax(String name, SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, false, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, false, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -1906,7 +1906,7 @@ public class SDMath extends SDOps {
   public SDVariable iamax(SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, keepDims, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable();
   }
 
   /**
@@ -1922,7 +1922,7 @@ public class SDMath extends SDOps {
   public SDVariable iamax(String name, SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(sd,in, keepDims, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(sd,in, keepDims, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -1937,7 +1937,7 @@ public class SDMath extends SDOps {
   public SDVariable iamin(SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, false, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable();
   }
 
   /**
@@ -1952,7 +1952,7 @@ public class SDMath extends SDOps {
   public SDVariable iamin(String name, SDVariable in, int... dimensions) {
     SDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, false, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, false, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
@@ -1968,7 +1968,7 @@ public class SDMath extends SDOps {
   public SDVariable iamin(SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, keepDims, dimensions).outputVariable();
+    return new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable();
   }
 
   /**
@@ -1984,7 +1984,7 @@ public class SDMath extends SDOps {
   public SDVariable iamin(String name, SDVariable in, boolean keepDims, int... dimensions) {
     SDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(sd,in, keepDims, dimensions).outputVariable();
+    SDVariable out =  new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(sd,in, keepDims, dimensions).outputVariable();
     return sd.updateVariableNameAndReference(out, name);
   }
 
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java
index 33d983f23..52f39982b 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/LegacyOpMapper.java
@@ -682,14 +682,6 @@ public class LegacyOpMapper {
 
     public static Class<?> indexReduceClass(int opNum){
         switch (opNum){
-            case 0:
-                return IMax.class;
-            case 1:
-                return IMin.class;
-            case 2:
-                return IAMax.class;
-            case 3:
-                return IAMin.class;
             case 4:
                 return FirstIndex.class;
             case 5:
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
index 756052851..386ead0b3 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
@@ -1055,10 +1055,6 @@ public class OpValidation {
                 IsNumericTensor.class,
                 //Exclude index accumulations (index out, not real-valued)
                 FirstIndex.class,
-                IAMax.class,
-                IAMin.class,
-                IMax.class,
-                IMin.class,
                 LastIndex.class,
                 ArgMax.class,
                 ArgMin.class,
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
index a053a40ab..63138719c 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
@@ -105,13 +105,11 @@ public class ImportClassMapping {
             org.nd4j.linalg.api.ops.impl.image.ResizeNearestNeighbor.class,
             org.nd4j.linalg.api.ops.impl.image.ResizeArea.class,
             org.nd4j.linalg.api.ops.impl.indexaccum.FirstIndex.class,
-            org.nd4j.linalg.api.ops.impl.indexaccum.IAMax.class,
-            org.nd4j.linalg.api.ops.impl.indexaccum.IAMin.class,
-            org.nd4j.linalg.api.ops.impl.indexaccum.IMax.class,
-            org.nd4j.linalg.api.ops.impl.indexaccum.IMin.class,
             org.nd4j.linalg.api.ops.impl.indexaccum.LastIndex.class,
             org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax.class,
             org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin.class,
+            org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax.class,
+            org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin.class,
             org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction.class,
             org.nd4j.linalg.api.ops.impl.layers.convolution.AvgPooling2D.class,
             org.nd4j.linalg.api.ops.impl.layers.convolution.AvgPooling3D.class,
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java
deleted file mode 100644
index b2e0d1192..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMax.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-package org.nd4j.linalg.api.ops.impl.indexaccum;
-
-import lombok.Data;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.BaseIndexAccumulation;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Calculate the index of the max absolute value over a vector
- *
- * @author Adam Gibson
- */
-@Data
-public class IAMax extends BaseIndexAccumulation {
-    public IAMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
-        super(sameDiff, i_v, keepDims, dimensions);
-    }
-
-    public IAMax() {}
-
-    public IAMax(INDArray x, int... dimensions) {
-        this(x, false, dimensions);
-    }
-
-    public IAMax(INDArray x, boolean keepDims, int... dimensions) {
-        this(x, null, dimensions);
-        this.keepDims = keepDims;
-    }
-
-    public IAMax(INDArray x, INDArray z, int... dimensions) {
-        super(x, z, dimensions);
-    }
-
-    @Override
-    public int opNum() {
-        return 2;
-    }
-
-    @Override
-    public String opName() {
-        return "iamax";
-    }
-
-    @Override
-    public String onnxName() {
-        return "AbsArgMax";
-    }
-
-    @Override
-    public String tensorflowName() {
-        return "absargmax";
-    }
-
-    @Override
-    public List<SDVariable> doDiff(List<SDVariable> grad){
-        return Collections.singletonList(sameDiff.zerosLike(arg()));
-    }
-}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java
deleted file mode 100644
index f20547c1d..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IAMin.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-package org.nd4j.linalg.api.ops.impl.indexaccum;
-
-import lombok.Data;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.BaseIndexAccumulation;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Calculate the index of the max absolute value over a vector
- *
- * @author Adam Gibson
- */
-@Data
-public class IAMin extends BaseIndexAccumulation {
-    public IAMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
-        super(sameDiff, i_v, keepDims, dimensions);
-    }
-
-    public IAMin() {}
-
-    public IAMin(INDArray x, int... dimensions) {
-        super(x, dimensions);
-    }
-
-    public IAMin(INDArray in, boolean keepDims, int... dimnesions){
-        super(in, null, dimnesions);
-        this.keepDims = keepDims;
-    }
-
-    public IAMin(INDArray x, INDArray z, int... dimensions) {
-        super(x, z, dimensions);
-    }
-
-
-
-    @Override
-    public int opNum() {
-        return 3;
-    }
-
-    @Override
-    public String opName() {
-        return "iamin";
-    }
-
-    @Override
-    public String onnxName() {
-        return "AbsArgMin";
-    }
-
-    @Override
-    public String tensorflowName() {
-        return "absargmin";
-    }
-
-    @Override
-    public List<SDVariable> doDiff(List<SDVariable> grad){
-        return Collections.singletonList(sameDiff.zerosLike(arg()));
-    }
-}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java
deleted file mode 100644
index 127239bc7..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMax.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-package org.nd4j.linalg.api.ops.impl.indexaccum;
-
-import lombok.Data;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.imports.NoOpNameFoundException;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.BaseIndexAccumulation;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Calculate the index
- * of max value over a vector
- *
- * @author Alex Black
- */
-@Data
-public class IMax extends BaseIndexAccumulation {
-    public IMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
-        super(sameDiff, i_v, keepDims, dimensions);
-    }
-
-    public IMax() {
-    }
-
-    public IMax(INDArray x, INDArray z, int... dimensions) {
-        super(x, z, dimensions);
-    }
-
-    public IMax(INDArray x, int... dimensions) {
-        super(x, null, dimensions);
-    }
-
-    public IMax(INDArray x, boolean keepDims, int... dimensions) {
-        super(x, null, dimensions);
-        this.keepDims = keepDims;
-    }
-
-    @Override
-    public int opNum() {
-        return 0;
-    }
-
-    @Override
-    public String opName() {
-        return "imax";
-    }
-
-    @Override
-    public String onnxName() {
-        return "arg_max";
-    }
-
-    @Override
-    public String tensorflowName() {
-        throw new NoOpNameFoundException("No tensorflow op opName found for " +  opName());
-    }
-
-    @Override
-    public Type opType() {
-        return Type.INDEXREDUCE;
-    }
-
-    @Override
-    public List<SDVariable> doDiff(List<SDVariable> f1) {
-        //Not differentiable, but (assuming no ties) output does not change for a given infinitesimal change in the input
-        return Collections.singletonList(sameDiff.zerosLike(arg()));
-    }
-}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java
deleted file mode 100644
index a459e8c9c..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/IMin.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-package org.nd4j.linalg.api.ops.impl.indexaccum;
-
-import lombok.Data;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.imports.NoOpNameFoundException;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.BaseIndexAccumulation;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Calculate the index of min value over a vector
- *
- * @author Alex Black
- */
-@Data
-public class IMin extends BaseIndexAccumulation {
-    public IMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
-        super(sameDiff, i_v, keepDims, dimensions);
-    }
-
-    public IMin() {
-    }
-
-    public IMin(INDArray x, int... dimensions) {
-        super(x, dimensions);
-    }
-
-    public IMin(INDArray x, boolean keepDims, int... dimensions) {
-        super(x, keepDims, dimensions);
-    }
-
-    public IMin(INDArray x, INDArray z, int... dimensions) {
-        super(x, z, dimensions);
-    }
-
-
-
-    @Override
-    public int opNum() {
-        return 1;
-    }
-
-    @Override
-    public String opName() {
-        return "imin";
-    }
-
-    @Override
-    public String onnxName() {
-        return "ArgMin";
-    }
-
-    @Override
-    public String tensorflowName() {
-        throw new NoOpNameFoundException("No tensorflow op opName found for " +  opName());
-    }
-
-
-    @Override
-    public List<SDVariable> doDiff(List<SDVariable> f1) {
-        //Not differentiable, but (assuming no ties) output does not change for a given infinitesimal change in the input
-        return Collections.singletonList(sameDiff.zerosLike(arg()));
-    }
-}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java
new file mode 100644
index 000000000..b4d74d3be
--- /dev/null
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmax.java
@@ -0,0 +1,111 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.nd4j.linalg.api.ops.impl.indexaccum.custom;
+
+import lombok.Data;
+import org.nd4j.autodiff.samediff.SDVariable;
+import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.common.base.Preconditions;
+import org.nd4j.imports.NoOpNameFoundException;
+import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.DynamicCustomOp;
+import org.tensorflow.framework.AttrValue;
+import org.tensorflow.framework.GraphDef;
+import org.tensorflow.framework.NodeDef;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class ArgAmax extends DynamicCustomOp {
+    protected boolean keepDims = false;
+    private int[] dimensions;
+
+    protected DataType outputType = DataType.INT64;
+
+    public ArgAmax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
+        super(sameDiff, i_v);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgAmax() {
+    }
+
+    public ArgAmax(INDArray x, INDArray z, boolean keepDims, int... dimensions) {
+        super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgAmax(INDArray x, INDArray z, int... dimensions) {
+        this(x, z, false, dimensions);
+    }
+
+    public ArgAmax(INDArray x, int... dimensions) {
+        this(x, null, dimensions);
+    }
+
+    public ArgAmax(INDArray x, boolean keepDims, int... dimensions) {
+        this(x, null, keepDims, dimensions);
+    }
+
+    @Override
+    public String opName() {
+        return "argamax";
+    }
+
+    @Override
+    public String tensorflowName() {
+        throw new NoOpNameFoundException("No tensorflow op opName found for " +  opName());
+    }
+
+    @Override
+    public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map<String, AttrValue> attributesForNode, GraphDef graph) {
+        if(attributesForNode.containsKey("output_type")) {
+            outputType = TFGraphMapper.convertType(attributesForNode.get("output_type").getType());
+        } else {
+            outputType = DataType.LONG;
+        }
+    }
+
+    @Override
+    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
+        Preconditions.checkState(inputDataTypes != null && (inputDataTypes.size() == 1 || inputDataTypes.size() == 2),
+                "Expected 1 or 2 input datatype to argamax, got %s", inputDataTypes);    //2nd input: axis
+        return Collections.singletonList(outputType == null ? DataType.LONG : outputType);
+    }
+}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java
new file mode 100644
index 000000000..530d7778e
--- /dev/null
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgAmin.java
@@ -0,0 +1,111 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.nd4j.linalg.api.ops.impl.indexaccum.custom;
+
+import lombok.Data;
+import org.nd4j.autodiff.samediff.SDVariable;
+import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.common.base.Preconditions;
+import org.nd4j.imports.NoOpNameFoundException;
+import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.DynamicCustomOp;
+import org.tensorflow.framework.AttrValue;
+import org.tensorflow.framework.GraphDef;
+import org.tensorflow.framework.NodeDef;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class ArgAmin extends DynamicCustomOp {
+    protected boolean keepDims = false;
+    private int[] dimensions;
+
+    protected DataType outputType = DataType.INT64;
+
+    public ArgAmin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
+        super(sameDiff, i_v);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgAmin() {
+    }
+
+    public ArgAmin(INDArray x, INDArray z, boolean keepDims, int... dimensions) {
+        super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgAmin(INDArray x, INDArray z, int... dimensions) {
+        this(x, z, false, dimensions);
+    }
+
+    public ArgAmin(INDArray x, int... dimensions) {
+        this(x, null, dimensions);
+    }
+
+    public ArgAmin(INDArray x, boolean keepDims, int... dimensions) {
+        this(x, null, keepDims, dimensions);
+    }
+
+    @Override
+    public String opName() {
+        return "argamin";
+    }
+
+    @Override
+    public String tensorflowName() {
+        throw new NoOpNameFoundException("No tensorflow op opName found for " +  opName());
+    }
+
+    @Override
+    public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map<String, AttrValue> attributesForNode, GraphDef graph) {
+        if(attributesForNode.containsKey("output_type")) {
+            outputType = TFGraphMapper.convertType(attributesForNode.get("output_type").getType());
+        } else {
+            outputType = DataType.LONG;
+        }
+    }
+
+    @Override
+    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
+        Preconditions.checkState(inputDataTypes != null && (inputDataTypes.size() == 1 || inputDataTypes.size() == 2),
+                "Expected 1 or 2 input datatype to argamin, got %s", inputDataTypes);    //2nd input: axis
+        return Collections.singletonList(outputType == null ? DataType.LONG : outputType);
+    }
+}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java
index 1c19b82a5..799e6ec65 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMax.java
@@ -17,10 +17,12 @@
 package org.nd4j.linalg.api.ops.impl.indexaccum.custom;
 
 import lombok.Data;
+import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
 import org.nd4j.common.base.Preconditions;
 import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
 import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.tensorflow.framework.AttrValue;
 import org.tensorflow.framework.GraphDef;
@@ -32,8 +34,53 @@ import java.util.Map;
 
 @Data
 public class ArgMax extends DynamicCustomOp {
+    protected boolean keepDims = false;
+    private int[] dimensions;
 
-    protected DataType outputType;
+    protected DataType outputType = DataType.INT64;
+
+    public ArgMax(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
+        super(sameDiff, i_v);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgMax() {
+    }
+
+    public ArgMax(INDArray x, INDArray z, boolean keepDims, int... dimensions) {
+        super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgMax(INDArray x, INDArray z, int... dimensions) {
+        this(x, z, false, dimensions);
+    }
+
+    public ArgMax(INDArray x, int... dimensions) {
+        this(x, null, dimensions);
+    }
+
+    public ArgMax(INDArray x, boolean keepDims, int... dimensions) {
+        this(x, null, keepDims, dimensions);
+    }
 
     @Override
     public String opName() {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java
index c93bb1acf..cfd96de42 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum/custom/ArgMin.java
@@ -17,10 +17,12 @@
 package org.nd4j.linalg.api.ops.impl.indexaccum.custom;
 
 import lombok.Data;
+import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
 import org.nd4j.common.base.Preconditions;
 import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
 import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.tensorflow.framework.AttrValue;
 import org.tensorflow.framework.GraphDef;
@@ -37,8 +39,53 @@ import java.util.Map;
  */
 @Data
 public class ArgMin extends DynamicCustomOp {
+    protected boolean keepDims = false;
+    private int[] dimensions;
 
-    protected DataType outputType = DataType.LONG;
+    protected DataType outputType = DataType.INT64;
+
+    public ArgMin(SameDiff sameDiff, SDVariable i_v, boolean keepDims, int[] dimensions) {
+        super(sameDiff, i_v);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgMin() {
+    }
+
+    public ArgMin(INDArray x, INDArray z, boolean keepDims, int... dimensions) {
+        super(new INDArray[]{x}, z != null ? new INDArray[] {z} : new INDArray[0]);
+
+        this.keepDims = keepDims;
+        this.dimensions = dimensions;
+
+        if (dimensions != null && dimensions.length > 0)
+            addIArgument(dimensions);
+
+        addBArgument(keepDims);
+
+        addDArgument(outputType);
+    }
+
+    public ArgMin(INDArray x, INDArray z, int... dimensions) {
+        this(x, z, false, dimensions);
+    }
+
+    public ArgMin(INDArray x, int... dimensions) {
+        this(x, null, dimensions);
+    }
+
+    public ArgMin(INDArray x, boolean keepDims, int... dimensions) {
+        this(x, null, keepDims, dimensions);
+    }
 
     @Override
     public String opName() {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
index b01c28d16..88d0cbe44 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@@ -17,6 +17,8 @@
 package org.nd4j.linalg.factory;
 
 import lombok.extern.slf4j.Slf4j;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin;
 import org.nd4j.linalg.factory.ops.*;
 import org.nd4j.shade.guava.primitives.Ints;
 import org.nd4j.shade.guava.primitives.Longs;
@@ -50,8 +52,6 @@ import org.nd4j.linalg.api.ops.Op;
 import org.nd4j.linalg.api.ops.OpContext;
 import org.nd4j.linalg.api.ops.executioner.DefaultOpExecutioner;
 import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMin;
 import org.nd4j.linalg.api.ops.impl.reduce.Mmul;
 import org.nd4j.linalg.api.ops.impl.scalar.ReplaceNans;
 import org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate;
@@ -627,16 +627,16 @@ public class Nd4j {
      * @return array of maximum values.
      */
     public static INDArray argMax(INDArray arr, @NonNull int... dimension) {
-        IMax imax = new IMax(arr, dimension);
-        return Nd4j.getExecutioner().exec(imax);
+        val imax = new ArgMax(arr, dimension);
+        return Nd4j.getExecutioner().exec(imax)[0];
     }
 
     /**
      * See {@link #argMax(INDArray, int...)} but return minimum values.
      */
     public static INDArray argMin(INDArray arr, @NonNull int... dimension) {
-        IMin imin = new IMin(arr, dimension);
-        return Nd4j.getExecutioner().exec(imin);
+        val imin = new ArgMin(arr, dimension);
+        return Nd4j.getExecutioner().exec(imin)[0];
     }
 
     /**
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java
index 83352cbba..1b2718e2e 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDBase.java
@@ -75,7 +75,7 @@ public class NDBase {
   public INDArray argmax(INDArray in, boolean keepDims, int... dimensions) {
     NDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(in, keepDims, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, keepDims, dimensions))[0];
   }
 
   /**
@@ -97,7 +97,7 @@ public class NDBase {
   public INDArray argmax(INDArray in, int... dimensions) {
     NDValidation.validateNumerical("argmax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMax(in, false, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, false, dimensions))[0];
   }
 
   /**
@@ -123,7 +123,7 @@ public class NDBase {
   public INDArray argmin(INDArray in, boolean keepDims, int... dimensions) {
     NDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(in, keepDims, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, keepDims, dimensions))[0];
   }
 
   /**
@@ -148,7 +148,7 @@ public class NDBase {
   public INDArray argmin(INDArray in, int... dimensions) {
     NDValidation.validateNumerical("argmin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 0, "dimensions has incorrect size/length. Expected: dimensions.length >= 0, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IMin(in, false, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, false, dimensions))[0];
   }
 
   /**
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java
index cb8ab10c0..cf03080f0 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/ops/NDMath.java
@@ -896,7 +896,7 @@ public class NDMath {
   public INDArray iamax(INDArray in, int... dimensions) {
     NDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(in, false, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, false, dimensions))[0];
   }
 
   /**
@@ -911,7 +911,7 @@ public class NDMath {
   public INDArray iamax(INDArray in, boolean keepDims, int... dimensions) {
     NDValidation.validateNumerical("iamax", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMax(in, keepDims, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax(in, keepDims, dimensions))[0];
   }
 
   /**
@@ -925,7 +925,7 @@ public class NDMath {
   public INDArray iamin(INDArray in, int... dimensions) {
     NDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(in, false, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, false, dimensions))[0];
   }
 
   /**
@@ -940,7 +940,7 @@ public class NDMath {
   public INDArray iamin(INDArray in, boolean keepDims, int... dimensions) {
     NDValidation.validateNumerical("iamin", "in", in);
     Preconditions.checkArgument(dimensions.length >= 1, "dimensions has incorrect size/length. Expected: dimensions.length >= 1, got %s", dimensions.length);
-    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.IAMin(in, keepDims, dimensions));
+    return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin(in, keepDims, dimensions))[0];
   }
 
   /**
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index b97274ba1..b4ef3cb05 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -17469,6 +17469,60 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
                                                                                 }
 //         #endif
 
+        /**
+         * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s))
+         * Expected input:
+         * 0: N-dimensional array
+         * 1: optional axis vector
+         *
+         * Int args:
+         * 0: optional axis
+         */
+//         #if NOT_EXCLUDED(OP_argamax)
+        @Namespace("sd::ops") public static class argamax extends DeclarableCustomOp {
+            static { Loader.load(); }
+            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+            public argamax(Pointer p) { super(p); }
+            /** Native array allocator. Access with {@link Pointer#position(long)}. */
+            public argamax(long size) { super((Pointer)null); allocateArray(size); }
+            private native void allocateArray(long size);
+            @Override public argamax position(long position) {
+                return (argamax)super.position(position);
+            }
+        
+                                                                                    public argamax() { super((Pointer)null); allocate(); }
+                                                                                    private native void allocate();
+                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
+                                                                                }
+//         #endif
+
+        /**
+         * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s))
+         * Expected input:
+         * 0: N-dimensional array
+         * 1: optional axis vector
+         *
+         * Int args:
+         * 0: optional axis
+         */
+//         #if NOT_EXCLUDED(OP_argamin)
+        @Namespace("sd::ops") public static class argamin extends DeclarableCustomOp {
+            static { Loader.load(); }
+            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+            public argamin(Pointer p) { super(p); }
+            /** Native array allocator. Access with {@link Pointer#position(long)}. */
+            public argamin(long size) { super((Pointer)null); allocateArray(size); }
+            private native void allocateArray(long size);
+            @Override public argamin position(long position) {
+                return (argamin)super.position(position);
+            }
+        
+                                                                                    public argamin() { super((Pointer)null); allocate(); }
+                                                                                    private native void allocate();
+                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
+                                                                                }
+//         #endif
+
         /**
          * This operation provides various normalization modes:
          * 0: frobenius
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java
index b8b5e05f4..dcd161604 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ReductionOpValidation.java
@@ -32,8 +32,8 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOp;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IAMin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin;
 import org.nd4j.linalg.api.ops.impl.loss.SoftmaxCrossEntropyWithLogitsLoss;
 import org.nd4j.linalg.api.ops.impl.reduce.Moments;
 import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
@@ -863,12 +863,12 @@ public class ReductionOpValidation extends BaseOpValidation {
                         break;
                     case 2:
                         reduce = sd.math().iamax(s, dim);
-                        exp = Nd4j.getExecutioner().exec(new IAMax(in.dup(), dim));
+                        exp = Nd4j.getExecutioner().exec(new ArgAmax(in.dup(), dim))[0];
                         name = "iamax";
                         break;
                     case 3:
                         reduce = sd.math().iamin(s, dim);
-                        exp = Nd4j.getExecutioner().exec(new IAMin(in.dup(), dim));
+                        exp = Nd4j.getExecutioner().exec(new ArgAmin(in.dup(), dim))[0];
                         name = "iamin";
                         break;
                     case 4:
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java
index 0d1d6a600..ca733c1e8 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/NameScopeTests.java
@@ -144,7 +144,7 @@ public class NameScopeTests extends BaseNd4jTest {
 
         scope.close();
 
-        assertTrue("Var with name test/imax exists", SD.variableMap().containsKey("test/imax"));
+        assertTrue("Var with name test/argmax exists", SD.variableMap().containsKey("test/argmax"));
     }
 
     @Test
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
index a70ede362..c9f5cef6f 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
@@ -52,10 +52,10 @@ import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastEqualTo;
 import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastGreaterThan;
 import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastGreaterThanOrEqual;
 import org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastLessThan;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IAMin;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv2D;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Im2col;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig;
@@ -3765,10 +3765,10 @@ public class Nd4jTestsC extends BaseNd4jTest {
         Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.ALL);
 
         INDArray arr = Nd4j.create(new double[] {-0.24, -0.26, -0.07, -0.01});
-        IMax iMax = new IMax(arr);
-        IAMax iaMax = new IAMax(arr.dup());
-        val imax = Nd4j.getExecutioner().execAndReturn(iMax).getFinalResult().intValue();
-        val iamax = Nd4j.getExecutioner().execAndReturn(iaMax).getFinalResult().intValue();
+        val iMax = new ArgMax(arr);
+        val iaMax = new ArgAmax(arr.dup());
+        val imax = Nd4j.getExecutioner().exec(iMax)[0].getInt(0);
+        val iamax = Nd4j.getExecutioner().exec(iaMax)[0].getInt(0);
 //        System.out.println("IMAX: " + imax);
 //        System.out.println("IAMAX: " + iamax);
         assertEquals(1, iamax);
@@ -3780,10 +3780,10 @@ public class Nd4jTestsC extends BaseNd4jTest {
     public void testIMinIAMin() {
         INDArray arr = Nd4j.create(new double[] {-0.24, -0.26, -0.07, -0.01});
         INDArray abs = Transforms.abs(arr);
-        IAMin iaMin = new IAMin(abs);
-        IMin iMin = new IMin(arr.dup());
-        double imin = Nd4j.getExecutioner().execAndReturn(iMin).getFinalResult().doubleValue();
-        double iamin = Nd4j.getExecutioner().execAndReturn(iaMin).getFinalResult().doubleValue();
+        val iaMin = new ArgAmin(abs);
+        val iMin = new ArgMin(arr.dup());
+        double imin = Nd4j.getExecutioner().exec(iMin)[0].getDouble(0);
+        double iamin = Nd4j.getExecutioner().exec(iaMin)[0].getDouble(0);
 //        System.out.println("IMin: " + imin);
 //        System.out.println("IAMin: " + iamin);
         assertEquals(3, iamin, 1e-12);
@@ -4077,7 +4077,7 @@ public class Nd4jTestsC extends BaseNd4jTest {
             arr.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.all()).assign(Nd4j.create(slices[i]));
         }
 
-        INDArray out = Nd4j.getExecutioner().exec(new IMax(arr, 1,2));
+        INDArray out = Nd4j.exec(new ArgMax(arr, 1,2))[0];
 
         assertEquals(DataType.LONG, out.dataType());
 
@@ -4119,8 +4119,8 @@ public class Nd4jTestsC extends BaseNd4jTest {
             }
         }
 
-        INDArray actC = Nd4j.getExecutioner().exec(new IMax(arr.dup('c'), 0,1));
-        INDArray actF = Nd4j.getExecutioner().exec(new IMax(arr.dup('f'),  0,1));
+        INDArray actC = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('c'), 0,1))[0];
+        INDArray actF = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('f'),  0,1))[0];
         //
         assertEquals(exp, actC);
         assertEquals(exp, actF);
@@ -4153,8 +4153,8 @@ public class Nd4jTestsC extends BaseNd4jTest {
             }
         }
 
-        actC = Nd4j.getExecutioner().exec(new IMax(arr.dup('c'), 2, 3));
-        actF = Nd4j.getExecutioner().exec(new IMax(arr.dup('f'), 2, 3));
+        actC = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('c'), 2, 3))[0];
+        actF = Nd4j.getExecutioner().exec(new ArgMax(arr.dup('f'), 2, 3))[0];
 
         assertEquals(exp, actC);
         assertEquals(exp, actF);
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java
index d0bcb3975..3277ddfc7 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/CrashTest.java
@@ -25,7 +25,7 @@ import org.junit.runners.Parameterized;
 import org.nd4j.linalg.BaseNd4jTest;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOp;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
 import org.nd4j.linalg.api.ops.impl.reduce3.ManhattanDistance;
 import org.nd4j.linalg.api.ops.impl.transforms.custom.LogSoftMax;
 import org.nd4j.linalg.api.ops.impl.transforms.custom.SoftMax;
@@ -122,7 +122,7 @@ public class CrashTest extends BaseNd4jTest {
         float sum = x.sumNumber().floatValue();
 
         // index reduction
-        Nd4j.getExecutioner().exec(new IMax(x));
+        Nd4j.getExecutioner().exec(new ArgMax(x));
 
         // casual transform
         Nd4j.getExecutioner().exec(new Sqrt(x, x));
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java
index 0fc085abe..330c1110a 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTests.java
@@ -26,9 +26,9 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOp;
 import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IAMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgAmax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.Mean;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.NormMax;
@@ -282,9 +282,9 @@ public class OpExecutionerTests extends BaseNd4jTest {
     public void testIamax2() {
         INDArray linspace = Nd4j.linspace(1, 4, 4, DataType.DOUBLE);
         assertEquals(getFailureMessage(), 3, Nd4j.getBlasWrapper().iamax(linspace));
-        val op = new IAMax(linspace);
+        val op = new ArgAmax(linspace);
 
-        int iamax = Nd4j.getExecutioner().execAndReturn(op).getFinalResult().intValue();
+        int iamax = Nd4j.getExecutioner().exec(op)[0].getInt(0);
         assertEquals(3, iamax);
     }
 
@@ -565,24 +565,24 @@ public class OpExecutionerTests extends BaseNd4jTest {
     @Test
     public void testIMax() {
         INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE);
-        IMax imax = new IMax(arr);
-        assertEquals(9, Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue());
+        ArgMax imax = new ArgMax(arr);
+        assertEquals(9, Nd4j.getExecutioner().exec(imax)[0].getInt(0));
 
         arr.muli(-1);
-        imax = new IMax(arr);
-        int maxIdx = Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue();
+        imax = new ArgMax(arr);
+        int maxIdx = Nd4j.getExecutioner().exec(imax)[0].getInt(0);
         assertEquals(0, maxIdx);
     }
 
     @Test
     public void testIMin() {
         INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE);
-        IMin imin = new IMin(arr);
-        assertEquals(0, Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue());
+        ArgMin imin = new ArgMin(arr);
+        assertEquals(0, Nd4j.getExecutioner().exec(imin)[0].getInt(0));
 
         arr.muli(-1);
-        imin = new IMin(arr);
-        int minIdx = Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue();
+        imin = new ArgMin(arr);
+        int minIdx = Nd4j.getExecutioner().exec(imin)[0].getInt(0);
         assertEquals(9, minIdx);
     }
 
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java
index 66305b42a..117f8745b 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ops/OpExecutionerTestsC.java
@@ -32,8 +32,8 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOp;
 import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
 import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMax;
-import org.nd4j.linalg.api.ops.impl.indexaccum.IMin;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMax;
+import org.nd4j.linalg.api.ops.impl.indexaccum.custom.ArgMin;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.Mean;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2;
 import org.nd4j.linalg.api.ops.impl.reduce.floating.NormMax;
@@ -478,24 +478,24 @@ public class OpExecutionerTestsC extends BaseNd4jTest {
     @Test
     public void testIMax() {
         INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE);
-        IMax imax = new IMax(arr);
-        assertEquals(9, Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue());
+        ArgMax imax = new ArgMax(arr);
+        assertEquals(9, Nd4j.getExecutioner().exec(imax)[0].getInt(0));
 
         arr.muli(-1);
-        imax = new IMax(arr);
-        int maxIdx = Nd4j.getExecutioner().execAndReturn(imax).getFinalResult().intValue();
+        imax = new ArgMax(arr);
+        int maxIdx = Nd4j.getExecutioner().exec(imax)[0].getInt(0);
         assertEquals(0, maxIdx);
     }
 
     @Test
     public void testIMin() {
         INDArray arr = Nd4j.linspace(1, 10, 10, DataType.DOUBLE);
-        IMin imin = new IMin(arr);
-        assertEquals(0, Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue());
+        ArgMin imin = new ArgMin(arr);
+        assertEquals(0, Nd4j.getExecutioner().exec(imin)[0].getInt(0));
 
         arr.muli(-1);
-        imin = new IMin(arr);
-        int minIdx = Nd4j.getExecutioner().execAndReturn(imin).getFinalResult().intValue();
+        imin = new ArgMin(arr);
+        int minIdx = Nd4j.getExecutioner().exec(imin)[0].getInt(0);
         assertEquals(9, minIdx);
     }
 
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java
index aa81097d1..c07fae701 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java
@@ -26,6 +26,7 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
+import org.nd4j.linalg.exception.ND4JIllegalStateException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.factory.Nd4jBackend;
 
@@ -234,7 +235,7 @@ public class EmptyTests extends BaseNd4jTest {
         assertEquals(e, reduced);
     }
 
-    @Test(expected = IllegalArgumentException.class)
+    @Test(expected = ND4JIllegalStateException.class)
     public void testEmptyReduction_4() {
         val x = Nd4j.create(DataType.FLOAT, 2, 0);
         val e = Nd4j.create(DataType.FLOAT, 0);

From 22141759345fbb464e19be943c54f2000a3978b0 Mon Sep 17 00:00:00 2001
From: "raver119@gmail.com" <raver119@gmail.com>
Date: Thu, 14 May 2020 13:54:52 +0300
Subject: [PATCH 02/21] disable unwanted logging

Signed-off-by: raver119@gmail.com <raver119@gmail.com>
---
 libnd4j/blas/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt
index 9902649f8..8419cdd4c 100755
--- a/libnd4j/blas/CMakeLists.txt
+++ b/libnd4j/blas/CMakeLists.txt
@@ -300,7 +300,7 @@ elseif(SD_CPU)
                 string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM})
                 set(FL_ITEM_WLE ${CMAKE_MATCH_1})
                 foreach(FL_TYPE_INDEX RANGE 0 9)
-                message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
+                    #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
                     configure_file(  "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY)
                     LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp )
                 endforeach() 

From 753ce28a924053228d0d5a1df33c24e4ff22d459 Mon Sep 17 00:00:00 2001
From: Yurii Shyrma <iuriish@yahoo.com>
Date: Thu, 14 May 2020 18:06:13 +0300
Subject: [PATCH 03/21] Shyrma sqrtm (#429)

* - start working on implementation of sqrtm op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - improving householder procedure

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further polishing householder stuff

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing hh pivoting qr procedure

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing BiDiagonalUp procedure

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing householder sequence class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing jacobi svd class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing svd stuff 1

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing svd stuff 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation and testing class which performs Hessenberg decomposition of square matrix

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add static method to JacobiSVD class which makes the continuous Givens rotation generation algorithm

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation and testing auxiliary methods of Schur decomp class

Signed-off-by: Yurii <iuriish@yahoo.com>

* some references here and there

Signed-off-by: raver119 <raver119@gmail.com>

* - trying figure out difference between eigen and our Schur alg

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing fixing bugs in Schur decomposition op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start to implement class which performs calculation of eigen values and vectors

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add to EigenValsAndVecs method which calculates complex eigen vectors

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in EigenValsAndVecs class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation and testing triangularSolver class

Signed-off-by: Yurii <iuriish@yahoo.com>

* Added a 2D routine for triangular systems solve.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored triangularSolve2D routine and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored another test for triangularSolve2D.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored test for triangularSolve for vector-bar case.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored triangularSolve2D routine and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* - implementation of FullPivLU class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bugs in FullPivLU::solve method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct permutation vector in FullPivLU::solve

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct include headers

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation of Sqrtm class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in Sqrtm class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - include sqrtm classes to cuda folder, investigate in what places synchronization doesn't work

Signed-off-by: Yurii <iuriish@yahoo.com>

* Added implementation for cuda triangularSolve2D and also refactored triangularSolve2D for cpu.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste implementations.

Signed-off-by: shugeo <sgazeos@gmail.com>

* - make offset calculation faster in t<> methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - rename refference T& NDArray::t<> method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on cuda sqrtm

Signed-off-by: Yurii <iuriish@yahoo.com>

* - provide correct synchronization to device in Sqrtm class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add tests for sqrtm op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct fails which appeared while testing on jenkins

Signed-off-by: Yurii <iuriish@yahoo.com>

* - trying to find out mistake in svd::deflation method

Signed-off-by: Yurii <iuriish@yahoo.com>

* Revert "- trying to find out mistake in svd::deflation method"

This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6.

* Revert "- trying to find out mistake in svd::deflation method"

This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6.

Signed-off-by: Yurii <iuriish@yahoo.com>

* - change call semantic of r<> and t<> methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - ged rid of ambiguity in * operator overloads for windows buikd

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of ambiguity in * operator overloads for windows build 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of ambiguity in * operator overloads for windows build 3

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts with master

Signed-off-by: Yurii <iuriish@yahoo.com>

* cmakelists updated

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* - minor fix in merge cpu helper - make use of reference getter

Signed-off-by: Yurii <iuriish@yahoo.com>

Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: shugeo <sgazeos@gmail.com>
---
 libnd4j/include/array/NDArray.h               | 110 +--
 libnd4j/include/array/NDArray.hXX             |  45 +-
 libnd4j/include/array/cpu/NDArray.cpp         |  31 +-
 libnd4j/include/array/cuda/NDArray.cu         |  19 +-
 libnd4j/include/helpers/EigenValsAndVecs.h    |  86 ++
 libnd4j/include/helpers/FullPivLU.h           |  52 +
 libnd4j/include/helpers/HessenbergAndSchur.h  | 102 ++
 libnd4j/include/helpers/Sqrtm.h               |  45 +
 libnd4j/include/helpers/biDiagonalUp.h        |  13 +-
 libnd4j/include/helpers/cpu/biDiagonalUp.cpp  | 180 ----
 libnd4j/include/helpers/cpu/hhColPivQR.cpp    | 171 ----
 libnd4j/include/helpers/cpu/householder.cpp   | 221 -----
 libnd4j/include/helpers/cpu/svd.cpp           | 485 ++++------
 libnd4j/include/helpers/hhSequence.h          |  28 +-
 libnd4j/include/helpers/householder.h         |  46 +-
 .../include/helpers/impl/EigenValsAndVecs.cpp | 293 ++++++
 libnd4j/include/helpers/impl/FullPivLU.cpp    | 170 ++++
 .../helpers/impl/HessenbergAndSchur.cpp       | 383 ++++++++
 libnd4j/include/helpers/impl/MmulHelper.cpp   |   2 +-
 libnd4j/include/helpers/impl/Sqrtm.cpp        | 276 ++++++
 libnd4j/include/helpers/impl/biDiagonalUp.cpp | 160 +++
 libnd4j/include/helpers/impl/hhColPivQR.cpp   | 147 +++
 .../helpers/{cpu => impl}/hhSequence.cpp      |  79 +-
 libnd4j/include/helpers/impl/householder.cpp  | 218 +++++
 .../helpers/{cpu => impl}/jacobiSVD.cpp       | 235 ++---
 libnd4j/include/helpers/jacobiSVD.h           |   7 +-
 libnd4j/include/helpers/shape.h               |   2 +-
 .../loops/cuda/specials/swapUnsafeKernel.cu   |  28 +-
 .../ops/declarable/generic/linalg/sqrtm.cpp   |  53 +
 .../generic/{blas => linalg}/svd.cpp          |   0
 .../generic/linalg/triangular_solve.cpp       |   4 +-
 libnd4j/include/ops/declarable/headers/blas.h |  30 +-
 .../ops/declarable/helpers/cpu/betaInc.cpp    |   2 +-
 .../helpers/cpu/extract_patches.cpp           |   2 +-
 .../helpers/cpu/fake_quantization.cpp         |   2 +-
 .../declarable/helpers/cpu/image_resize.cpp   |   2 +-
 .../ops/declarable/helpers/cpu/lstsq.cpp      |   4 +-
 .../ops/declarable/helpers/cpu/lup.cpp        |  34 +-
 .../ops/declarable/helpers/cpu/merge.cpp      |   4 +-
 .../ops/declarable/helpers/cpu/random.cpp     |  14 +-
 .../declarable/helpers/cpu/randomShuffle.cpp  |   8 +-
 .../ops/declarable/helpers/cpu/segment.cpp    |  14 +-
 .../declarable/helpers/cpu/sequence_mask.cpp  |   2 +-
 .../ops/declarable/helpers/cpu/solve.cpp      |   6 +-
 .../ops/declarable/helpers/cpu/svd.cpp        | 912 +-----------------
 .../ops/declarable/helpers/cpu/top_k.cpp      |  16 +-
 .../helpers/cpu/triangular_solve.cpp          |  38 +-
 .../ops/declarable/helpers/cpu/triu.cpp       |   2 +-
 .../helpers/cuda/triangular_solve.cu          | 139 ++-
 .../helpers/impl/sparse_to_dense.cpp          |   1 +
 .../ops/declarable/helpers/impl/sqrtm.cpp     |  66 ++
 .../include/ops/declarable/helpers/sqrtm.h    |  39 +
 .../ops/declarable/helpers/triangular_solve.h |   4 +-
 .../layers_tests/DeclarableOpsTests11.cpp     |  86 +-
 .../layers_tests/DeclarableOpsTests13.cpp     | 494 ----------
 .../layers_tests/DeclarableOpsTests15.cpp     |  75 +-
 .../layers_tests/DeclarableOpsTests19.cpp     |   2 +
 .../tests_cpu/layers_tests/HelpersTests1.cpp  | 868 +++++++----------
 .../tests_cpu/layers_tests/HelpersTests2.cpp  | 426 ++++++++
 .../tests_cpu/layers_tests/NDArrayTests.cpp   |   3 +
 .../tests_cpu/layers_tests/NativeOpsTests.cpp |   2 +-
 .../layers_tests/PlaygroundTests.cpp          | 523 ++++++++++
 libnd4j/tests_cpu/layers_tests/RNGTests.cpp   |  44 +-
 .../tests_cpu/libnd4j_tests/CMakeLists.txt    |  13 +-
 64 files changed, 4310 insertions(+), 3258 deletions(-)
 create mode 100644 libnd4j/include/helpers/EigenValsAndVecs.h
 create mode 100644 libnd4j/include/helpers/FullPivLU.h
 create mode 100644 libnd4j/include/helpers/HessenbergAndSchur.h
 create mode 100644 libnd4j/include/helpers/Sqrtm.h
 delete mode 100644 libnd4j/include/helpers/cpu/biDiagonalUp.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/hhColPivQR.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/householder.cpp
 create mode 100644 libnd4j/include/helpers/impl/EigenValsAndVecs.cpp
 create mode 100644 libnd4j/include/helpers/impl/FullPivLU.cpp
 create mode 100644 libnd4j/include/helpers/impl/HessenbergAndSchur.cpp
 create mode 100644 libnd4j/include/helpers/impl/Sqrtm.cpp
 create mode 100644 libnd4j/include/helpers/impl/biDiagonalUp.cpp
 create mode 100644 libnd4j/include/helpers/impl/hhColPivQR.cpp
 rename libnd4j/include/helpers/{cpu => impl}/hhSequence.cpp (59%)
 create mode 100644 libnd4j/include/helpers/impl/householder.cpp
 rename libnd4j/include/helpers/{cpu => impl}/jacobiSVD.cpp (58%)
 create mode 100644 libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp
 rename libnd4j/include/ops/declarable/generic/{blas => linalg}/svd.cpp (100%)
 create mode 100644 libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp
 create mode 100644 libnd4j/include/ops/declarable/helpers/sqrtm.h
 create mode 100644 libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp

diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h
index ae4df227d..04500a987 100644
--- a/libnd4j/include/array/NDArray.h
+++ b/libnd4j/include/array/NDArray.h
@@ -1163,7 +1163,7 @@ namespace sd {
 
         /**
         * fill target matrix with given value in one or two directions from main diagonal:
-        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both)
+        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both)
         *   - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both)
         * direction - in what direction to fill matrix. There are 3 possible directions:
         *   'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected
@@ -1230,14 +1230,13 @@ namespace sd {
         *  returns reference on array element with given index
         */
         template<typename T>
-        FORCEINLINE T& t(const Nd4jLong index);
-
+        FORCEINLINE T& r(const Nd4jLong index);
         template<typename T>
-        FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j);
+        FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j);
         template<typename T>
-        FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k);
+        FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k);
         template<typename T>
-        FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w);
+        FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w);
 
 
         /**
@@ -1246,7 +1245,6 @@ namespace sd {
         */
         template<typename T>
         FORCEINLINE T t(const Nd4jLong i) const;
-
         template<typename T>
         FORCEINLINE T t(const Nd4jLong i, const Nd4jLong j) const;
         template<typename T>
@@ -1778,70 +1776,60 @@ DataType NDArray::dataType() const {
 
 ////////////////////////////////////////////////////////////////////////
 template <typename T>
-T& NDArray::t(const Nd4jLong i) {
+T& NDArray::r(const Nd4jLong i) {
 
     // if (i >= _length)
     //     throw std::invalid_argument("NDArray::t(i): input index is out of array length !");
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
-
+    syncToHost();
     tickWriteHost();
+
     return *(reinterpret_cast<T*>(bufferWithOffset(getOffset(i))));
 }
 
 ////////////////////////////////////////////////////////////////////////
 template <typename T>
-T& NDArray::t(const Nd4jLong i, const Nd4jLong j) {
+T& NDArray::r(const Nd4jLong i, const Nd4jLong j) {
 
     if (rankOf() != 2 || i >= sizeAt(0) || j >= sizeAt(1))
             throw std::invalid_argument("NDArray::t(i,j): one of input indexes is out of array length or rank!=2 !");
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
-
-    Nd4jLong coords[2] = {i, j};
-    auto offset = shape::getOffset(shapeInfo(), coords);
+    syncToHost();
     tickWriteHost();
-    return *(reinterpret_cast<T*>(bufferWithOffset(offset)));
+
+    return *(reinterpret_cast<T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1))));
 }
 
 template <typename T>
-T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) {
+T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) {
 
     if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2))
         throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!");
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
-
-    Nd4jLong coords[3] = {i, j, k};
-    auto offset = shape::getOffset(shapeInfo(), coords);
+    syncToHost();
     tickWriteHost();
-    return *(reinterpret_cast<T*>(bufferWithOffset(offset)));
+
+    return *(reinterpret_cast<T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2))));
 }
 
 template <typename T>
-T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) {
+T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) {
 
     if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3))
         throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4 !");
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
-
-    Nd4jLong coords[4] = {i, j, k, w};
-    auto offset = shape::getOffset(shapeInfo(), coords);
+    syncToHost();
     tickWriteHost();
-    return *(reinterpret_cast<T*>(bufferWithOffset(offset)));
+
+    return *(reinterpret_cast<T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3))));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1853,10 +1841,8 @@ T NDArray::t(const Nd4jLong i) const {
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
+    syncToHost();
 
-    tickReadHost();
     return *(reinterpret_cast<const T*>(bufferWithOffset(getOffset(i))));
 }
 
@@ -1869,48 +1855,38 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const {
     if (DataTypeUtils::fromT<T>() != _dataType)
         throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!");
 
-    if(!isActualOnHostSide())
-        syncToHost();
+    syncToHost();
 
-    Nd4jLong coords[2] = {i, j};
-    auto offset = shape::getOffset(shapeInfo(), coords);
-    tickReadHost();
-    return *(reinterpret_cast<const T*>(bufferWithOffset(offset)));
+    return *(reinterpret_cast<const T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1))));
 }
 
-    template <typename T>
-    T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const {
+////////////////////////////////////////////////////////////////////////
+template <typename T>
+T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const {
 
-        if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2))
-            throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!");
-        if (DataTypeUtils::fromT<T>() != _dataType)
-            throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!");
+    if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2))
+        throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!");
+    if (DataTypeUtils::fromT<T>() != _dataType)
+        throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!");
 
-        if(!isActualOnHostSide())
-            syncToHost();
+    syncToHost();
 
-        Nd4jLong coords[3] = {i, j, k};
-        auto offset = shape::getOffset(shapeInfo(), coords);
-        tickReadHost();
-        return *(reinterpret_cast<const T*>(bufferWithOffset(offset)));
-    }
+    return *(reinterpret_cast<const T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2))));
+}
 
-    template <typename T>
-    T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const {
+////////////////////////////////////////////////////////////////////////
+template <typename T>
+T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const {
 
-        if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3))
-            throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!");
-        if (DataTypeUtils::fromT<T>() != _dataType)
-            throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!");
+    if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3))
+        throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!");
+    if (DataTypeUtils::fromT<T>() != _dataType)
+        throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!");
 
-        if(!isActualOnHostSide())
-            syncToHost();
+    syncToHost();
 
-        Nd4jLong coords[4] = {i, j, k, w};
-        auto offset = shape::getOffset(shapeInfo(), coords);
-        tickReadHost();
-        return *(reinterpret_cast<const T*>(bufferWithOffset(offset)));
-    }
+    return *(reinterpret_cast<const T*>(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3))));
+}
 
 #ifndef __JAVACPP_HACK__
 ////////////////////////////////////////////////////////////////////////
diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX
index 786333eec..773d845ab 100644
--- a/libnd4j/include/array/NDArray.hXX
+++ b/libnd4j/include/array/NDArray.hXX
@@ -2170,7 +2170,7 @@ const std::string* ND4J_EXPORT NDArray::bufferAsT() const {
 template <typename T>
 const T* NDArray::bufferAsT() const {
     // FIXME: do we REALLY want sync here?
-    syncToHost();
+    // syncToHost();
 
     return reinterpret_cast<const T*>(buffer());
 }
@@ -2597,11 +2597,9 @@ void NDArray::operator+=(const T value) {
 
     auto other = NDArrayFactory::create(this->dataType(), value, getContext());
 
-    NDArray::prepareSpecialUse({this}, {&other});
-
+    NDArray::prepareSpecialUse({this}, {this, &other});
     NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr);
-
-    NDArray::registerSpecialUse({this}, {});
+    NDArray::registerSpecialUse({this}, {this, &other});
 }
 template ND4J_EXPORT void NDArray::operator+=(const double value);
 template ND4J_EXPORT void NDArray::operator+=(const float value);
@@ -2619,11 +2617,9 @@ void NDArray::operator-=(const T value) {
 
     auto other = NDArrayFactory::create(dataType(), value, getContext());
 
-    NDArray::prepareSpecialUse({this}, {&other});
-
+    NDArray::prepareSpecialUse({this}, {this, &other});
     NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr);
-
-    NDArray::registerSpecialUse({this}, {});
+    NDArray::registerSpecialUse({this}, {this, &other});
 }
 template ND4J_EXPORT void NDArray::operator-=(const double value);
 template ND4J_EXPORT void NDArray::operator-=(const float value);
@@ -2640,10 +2636,9 @@ void NDArray::operator*=(const T scalar) {
         throw std::runtime_error("NDArray::operator*=: you can't use this method on String array!");
 
     auto other = NDArrayFactory::create(this->dataType(), scalar, getContext());
-    NDArray::prepareSpecialUse({this}, {&other});
+    NDArray::prepareSpecialUse({this}, {this, &other});
     NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr);
-
-    NDArray::registerSpecialUse({this}, {});
+    NDArray::registerSpecialUse({this}, {this, &other});
 }
 template ND4J_EXPORT void NDArray::operator*=(const double scalar);
 template ND4J_EXPORT void NDArray::operator*=(const float scalar);
@@ -2663,9 +2658,9 @@ void NDArray::operator/=(const T scalar) {
         throw std::runtime_error("NDArray::operator/=: you can't use this method on String array!");
 
     auto other = NDArrayFactory::create(this->dataType(), scalar, getContext());
-    NDArray::prepareSpecialUse({this}, {&other});
+    NDArray::prepareSpecialUse({this}, {this, &other});
     NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr);
-    NDArray::registerSpecialUse({this}, {});
+    NDArray::registerSpecialUse({this}, {this, &other});
 }
 template ND4J_EXPORT void NDArray::operator/=(const double scalar);
 template ND4J_EXPORT void NDArray::operator/=(const float scalar);
@@ -3758,8 +3753,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const {
     if (rankOf() != 2 || i >= shapeOf()[0] || j >= shapeOf()[1])
         throw std::invalid_argument("NDArray::e(i,j): one of input indexes is out of array length or rank!=2 !");
 
-    const Nd4jLong coords[2] = {i, j};
-    const auto xOffset = shape::getOffset(shapeInfo(), coords);
+    const auto xOffset = i * strideAt(0) + j * strideAt(1);
 
     NDArray::preparePrimaryUse({}, {this});
     NDArray::registerPrimaryUse({}, {this});
@@ -3778,8 +3772,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const {
     if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2])
         throw std::invalid_argument("NDArray::e(i,j,k): one of input indexes is out of array length or rank!=3 !");
 
-    const Nd4jLong coords[3] = {i, j, k};
-    const auto xOffset = shape::getOffset(shapeInfo(), coords);
+    const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2);
 
     NDArray::preparePrimaryUse({}, {this});
     NDArray::registerPrimaryUse({}, {this});
@@ -3798,8 +3791,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon
     if (rankOf() != 4 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2] || l >= shapeOf()[3])
         throw std::invalid_argument("NDArray::e(i,j,k,l): one of input indexes is out of array length or rank!=4 !");
 
-    const Nd4jLong coords[4] = {i, j, k, l};
-    const auto xOffset = shape::getOffset(shapeInfo(), coords);
+    const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3);
 
     NDArray::preparePrimaryUse({}, {this});
     NDArray::registerPrimaryUse({}, {this});
@@ -4411,8 +4403,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) {
         throw std::invalid_argument("NDArray:pe(i,j, value): one of input indexes is out of array length or rank!=2 !");
 
     void *p = reinterpret_cast<void *>(const_cast<T *>(&value));
-    Nd4jLong coords[2] = {i, j};
-    auto xOffset = shape::getOffset(shapeInfo(), coords);
+    auto xOffset = i * strideAt(0) + j * strideAt(1);
 
     NDArray::preparePrimaryUse({this}, {}, true);
     BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES);
@@ -4440,11 +4431,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va
     if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2])
         throw std::invalid_argument("NDArray:pe(i,j,k, value): one of input indexes is out of array length or rank!=3 !");
 
-    NDArray::preparePrimaryUse({this}, {}, true);
-
     void *p = reinterpret_cast<void *>(const_cast<T *>(&value));
-    Nd4jLong coords[3] = {i, j, k};
-    auto xOffset = shape::getOffset(shapeInfo(), coords);
+    auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2);
+
+    NDArray::preparePrimaryUse({this}, {}, true);
     BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES);
     NDArray::registerPrimaryUse({this}, {});
 }
@@ -4470,8 +4460,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j
         throw std::invalid_argument("NDArray::p(i,j,k,l, value): one of input indexes is out of array length or rank!=4 !");
 
     void *p = reinterpret_cast<void *>(const_cast<T *>(&value));
-    Nd4jLong coords[4] = {i, j, k, l};
-    auto xOffset = shape::getOffset(shapeInfo(), coords);
+    auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3);
 
     NDArray::preparePrimaryUse({this}, {}, true);
     BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES);
diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp
index 87369f740..873b3fec9 100644
--- a/libnd4j/include/array/cpu/NDArray.cpp
+++ b/libnd4j/include/array/cpu/NDArray.cpp
@@ -153,21 +153,38 @@ void NDArray::setIdentity() {
 
 ////////////////////////////////////////////////////////////////////////
 template <typename T>
-static void templatedSwap(void *xBuffer, void *yBuffer, Nd4jLong length) {
+static void templatedSwap(void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length) {
     auto x = reinterpret_cast<T *>(xBuffer);
     auto y = reinterpret_cast<T *>(yBuffer);
 
+    const bool isSameOrders = shape::order(xShapeInfo) == shape::order(xShapeInfo);
+
+    const auto xEws = shape::elementWiseStride(xShapeInfo);
+    const auto yEws = shape::elementWiseStride(yShapeInfo);
+
     auto func = PRAGMA_THREADS_FOR {
-        for (auto i = start; i < stop; i++) {
-            auto temp = x[i];
-            x[i] = y[i];
-            y[i] = temp;
+        if(isSameOrders && xEws > 0 && yEws > 0) {
+            for(auto i = start; i < stop; i++)
+                sd::math::nd4j_swap(x[i*xEws], y[i*yEws]);
+        }
+        else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
+            for(auto i = start; i < stop; i++) {
+                const auto ind = shape::getIndexOffset(i, xShapeInfo);
+                sd::math::nd4j_swap(x[ind], y[ind]);
+            }
+        }
+        else {
+            for(auto i = start; i < stop; i++) {
+                const auto xInd = shape::getIndexOffset(i, xShapeInfo);
+                const auto yInd = shape::getIndexOffset(i, yShapeInfo);
+                sd::math::nd4j_swap(x[xInd], y[yInd]);
+            }
         }
     };
 
     samediff::Threads::parallel_for(func, 0, length);
 }
-BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, Nd4jLong length), LIBND4J_TYPES);
+BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length), LIBND4J_TYPES);
 
 ////////////////////////////////////////////////////////////////////////
 void NDArray::swapUnsafe(NDArray& other) {
@@ -182,7 +199,7 @@ void NDArray::swapUnsafe(NDArray& other) {
     if(lengthOf() != other.lengthOf())
         throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!");
 
-    BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), this->lengthOf()), LIBND4J_TYPES);
+    BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), shapeInfo(), other.shapeInfo(), this->lengthOf()), LIBND4J_TYPES);
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/libnd4j/include/array/cuda/NDArray.cu b/libnd4j/include/array/cuda/NDArray.cu
index e33e97c3b..8ed3eceeb 100644
--- a/libnd4j/include/array/cuda/NDArray.cu
+++ b/libnd4j/include/array/cuda/NDArray.cu
@@ -225,7 +225,13 @@ void NDArray::swapUnsafe(NDArray& other) {
     if(lengthOf() != other.lengthOf())
         throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!");
 
+    PointersManager manager(getContext(), "NDArray::swapUnsafe");
+
+    prepareSpecialUse({&other, this}, {&other, this});
     BUILD_SINGLE_SELECTOR(xType, templatedSwapUnsafe, (specialBuffer(), specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), getContext()->getCudaStream()), LIBND4J_TYPES);
+    registerSpecialUse({&other, this}, {&other, this});
+
+    manager.synchronize();
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -546,21 +552,18 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre
         if(specialBuffer() == nullptr || _length == 0)
             { printf("NDArray::printSpecialBuffer: special buffer is nullptr !\n"); return; }
 
-        void* pHost = operator new(sizeof(T) * _length);
+        const auto sizeOfBuffer = sizeOfT() * (getOffset(_length - 1) + 1);
 
-        if (ews() != 1) {
-            for (uint i = 0; i < _length; i++)
-                cudaMemcpyAsync(reinterpret_cast<T*>(pHost) + i, specialBufferWithOffset(i), sizeof(T), cudaMemcpyDeviceToHost, *(getContext()->getCudaStream()));
-        }
-        else
-            cudaMemcpyAsync(pHost, specialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream());
+        void* pHost = operator new(sizeOfBuffer);
+
+        cudaMemcpyAsync(pHost, specialBuffer(), sizeOfBuffer, cudaMemcpyDeviceToHost, *getContext()->getCudaStream());
 
         cudaError_t cudaResult = cudaStreamSynchronize(*getContext()->getCudaStream());
         if(cudaResult != 0)
             throw std::runtime_error("NDArray::printSpecialBuffer: cudaStreamSynchronize failed!");
 
         for (uint i = 0; i < _length; i++)
-            printf("%.*f, ", precision, (double)reinterpret_cast<T*>(pHost)[i]);
+            printf("%.*f, ", precision, (double)reinterpret_cast<T*>(pHost)[getOffset(i)]);
         printf("\n");
 
         operator delete(pHost);
diff --git a/libnd4j/include/helpers/EigenValsAndVecs.h b/libnd4j/include/helpers/EigenValsAndVecs.h
new file mode 100644
index 000000000..222b9c36e
--- /dev/null
+++ b/libnd4j/include/helpers/EigenValsAndVecs.h
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#ifndef LIBND4J_EIGENVALSANDVECS_H
+#define LIBND4J_EIGENVALSANDVECS_H
+
+#include <array/NDArray.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+// this class calculates eigenvalues and eigenvectors of given input matrix
+template <typename T>
+class EigenValsAndVecs {
+
+    public:
+        // suppose we got input square NxN matrix
+
+        NDArray _Vals;      	// {N,2} matrix of eigenvalues, 2 means real and imaginary part
+        NDArray _Vecs;      	// {N,N,2} matrix, whose columns are the eigenvectors (complex), 2 means real and imaginary part
+
+        explicit EigenValsAndVecs(const NDArray& matrix);
+
+
+		//////////////////////////////////////////////////////////////////////////
+		FORCEINLINE static void divideComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) {
+
+    		T norm2 = a2*a2 + b2*b2;
+
+    		a3 = (a1*a2 + b1*b2) / norm2;
+    		b3 = (a2*b1 - a1*b2) / norm2;
+		}
+
+		//////////////////////////////////////////////////////////////////////////
+		FORCEINLINE static void multiplyComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) {
+
+    		a3 = (a1*a2 - b1*b2);
+    		b3 = (a1*b2 + b1*a2);
+		}
+
+		//////////////////////////////////////////////////////////////////////////
+		FORCEINLINE static void sqrtComplexNum(T& a, T& b) {
+
+			T norm = math::nd4j_sqrt<T,T>(a*a + b*b);
+
+    		if(b < (T)0)
+                b = -math::nd4j_sqrt<T,T>((T)0.5 * (norm - a));
+            else
+                b = math::nd4j_sqrt<T,T>((T)0.5 * (norm - a));
+    		a = math::nd4j_sqrt<T,T>((T)0.5 * (norm + a));
+		}
+
+
+    private:
+
+        void calcEigenVals(const NDArray& schurMatrixT);						// calculates _Vals
+        void calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU);	// makes changes both in schurMatrixT(NxN) and schurMatrixU(NxN), also calculates and stores pseudo-eigenvectors (real) in schurMatrixU columns
+        void calcEigenVecs(const NDArray& schurMatrixU);						// calculates _Vecs
+
+};
+
+
+}
+}
+}
+
+
+#endif //LIBND4J_EIGENVALSANDVECS_H
diff --git a/libnd4j/include/helpers/FullPivLU.h b/libnd4j/include/helpers/FullPivLU.h
new file mode 100644
index 000000000..3e285b597
--- /dev/null
+++ b/libnd4j/include/helpers/FullPivLU.h
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#ifndef LIBND4J_FULLPIVLU_H
+#define LIBND4J_FULLPIVLU_H
+
+#include <array/NDArray.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+// class solves equation A*x = b for x, by procedure of LU decomposition of input matrix A with complete pivoting
+// LU decomposition of a matrix is:
+// A = P^-1 * L * U * Q^-1
+// L is unit-lower-triangular,
+// U is upper-triangular,
+// and P and Q are permutation matrices for rows and columns correspondingly
+
+template <typename T>
+class FullPivLU {
+
+    public:
+
+        // A{M,K} * x{K,N} = b{M,N}
+        static void solve(const NDArray& A, const NDArray& b, NDArray& x);
+};
+
+
+}
+}
+}
+
+
+#endif //LIBND4J_FULLPIVLU_H
diff --git a/libnd4j/include/helpers/HessenbergAndSchur.h b/libnd4j/include/helpers/HessenbergAndSchur.h
new file mode 100644
index 000000000..9c209ea56
--- /dev/null
+++ b/libnd4j/include/helpers/HessenbergAndSchur.h
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#ifndef LIBND4J_HESSENBERGANDSCHUR_H
+#define LIBND4J_HESSENBERGANDSCHUR_H
+
+#include <array/NDArray.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+// this class implements Hessenberg decomposition of square matrix using orthogonal similarity transformation
+// A = Q H Q^T
+// Q - orthogonal matrix
+// H - Hessenberg matrix
+template <typename T>
+class Hessenberg {
+    // suppose we got input square NxN matrix
+
+    public:
+
+        NDArray _Q;     // {N,N}
+        NDArray _H;     // {N,N}
+
+        explicit Hessenberg(const NDArray& matrix);
+
+    private:
+        void evalData();
+};
+
+
+// this class implements real Schur decomposition of square matrix using orthogonal similarity transformation
+// A = U T U^T
+// T - real quasi-upper-triangular matrix - block upper triangular matrix where the blocks on the diagonal are 1×1 or 2×2 with complex eigenvalues
+// U - real orthogonal matrix
+
+template <typename T>
+class Schur {
+        // suppose we got input square NxN matrix
+
+    public:
+
+        NDArray _T;     // {N,N}
+        NDArray _U;     // {N,N}
+
+        explicit Schur(const NDArray& matrix);
+
+        void splitTwoRows(const int ind, const T shift);
+
+        void calcShift(const int ind, const int iter, T& shift, NDArray& shiftInfo);
+
+        void initFrancisQR(const int ind1,  const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec);
+
+        void doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec);
+
+        void calcFromHessenberg();
+
+    private:
+
+    	static const int _maxItersPerRow = 40;
+
+        void evalData(const NDArray& matrix);
+
+	    //////////////////////////////////////////////////////////////////////////
+		FORCEINLINE int getSmallSubdiagEntry(const int inInd) {
+
+			int outInd = inInd;
+			while (outInd > 0) {
+		    	T factor = math::nd4j_abs<T>(_T.t<T>(outInd-1, outInd-1)) + math::nd4j_abs<T>(_T.t<T>(outInd, outInd));
+		    	if (math::nd4j_abs<T>(_T.t<T>(outInd, outInd-1)) <= DataTypeUtils::eps<T>() * factor)
+		      		break;
+				outInd--;
+		  	}
+			return outInd;
+		}
+};
+
+
+}
+}
+}
+
+
+#endif //LIBND4J_HESSENBERGANDSCHUR_H
diff --git a/libnd4j/include/helpers/Sqrtm.h b/libnd4j/include/helpers/Sqrtm.h
new file mode 100644
index 000000000..1968bc7a5
--- /dev/null
+++ b/libnd4j/include/helpers/Sqrtm.h
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#ifndef LIBND4J_SQRTM_H
+#define LIBND4J_SQRTM_H
+
+#include <array/NDArray.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+template <typename T>
+class Sqrtm {
+
+
+    public:
+
+        static void calc(const NDArray& in, NDArray& out);
+};
+
+
+}
+}
+}
+
+
+#endif //LIBND4J_SQRTM_H
diff --git a/libnd4j/include/helpers/biDiagonalUp.h b/libnd4j/include/helpers/biDiagonalUp.h
index aaf64d41d..dc44057a9 100644
--- a/libnd4j/include/helpers/biDiagonalUp.h
+++ b/libnd4j/include/helpers/biDiagonalUp.h
@@ -32,13 +32,14 @@ namespace helpers {
 class BiDiagonalUp {
 
     public:
-        
+
         NDArray _HHmatrix;              // 2D Householder matrix
         NDArray _HHbidiag;              // vector which contains Householder coefficients
+        NDArray _hhCoeffs;              // vector of Householder coefficients
 
         /**
         *  constructor
-        *  
+        *
         *  matrix - input matrix expected to be bi-diagonalized, remains unaffected
         */
         BiDiagonalUp(const NDArray& matrix);
@@ -47,7 +48,7 @@ class BiDiagonalUp {
         *  this method evaluates data (coeff, normX, tail) used in Householder transformation
         *  formula for Householder matrix: P = identity_matrix - coeff * w * w^T
         *  P * x = [normX, 0, 0 , 0, ...]
-        *  coeff - scalar    
+        *  coeff - scalar
         *  w = [1, w1, w2, w3, ...], "tail" is w except first unity element, that is "tail" = [w1, w2, w3, ...]
         *  tail and coeff are stored in _HHmatrix
         *  normX are stored in _HHbidiag
@@ -59,13 +60,13 @@ class BiDiagonalUp {
 
         /**
         *  this method evaluates product of Householder sequence matrices (transformations) acting on columns
-        *  
+        *
         *  type - type of sequence, type = 'u' (acting on columns) or type = 'v' (acting on rows)
         */
         template <typename T>
-        HHsequence makeHHsequence_(const char type) const;
+        HHsequence makeHHsequence_(const char type);
 
-        HHsequence makeHHsequence(const char type) const;
+        HHsequence makeHHsequence(const char type);
 
 };
 
diff --git a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp b/libnd4j/include/helpers/cpu/biDiagonalUp.cpp
deleted file mode 100644
index 4623a93ad..000000000
--- a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by Yurii Shyrma on 18.12.2017
-//
-
-
-#include <helpers/householder.h>
-#include <helpers/biDiagonalUp.h>
-#include <array/NDArrayFactory.h>
-
-
-namespace sd {
-namespace ops {
-namespace helpers {
-
-
-//////////////////////////////////////////////////////////////////////////
-BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())),
-                                                         _HHbidiag(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) {
-
-	// input validation
-	if(matrix.rankOf() != 2 || matrix.isScalar())
-		throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !");
-
-	_HHmatrix.assign(&matrix);
-	_HHbidiag.assign(0.);
-	
-	evalData();
-
-}
-
-	template <typename T>
-	void BiDiagonalUp::_evalData() {
-
-		const auto rows = _HHmatrix.sizeAt(0);
-		const auto cols = _HHmatrix.sizeAt(1);
-
-		if(rows < cols)
-			throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !");
-
-		NDArray* bottomRightCorner(nullptr), *column(nullptr), *row(nullptr);
-		T coeff, normX;
-
-		T _x, _y;
-
-		for(Nd4jLong i = 0; i < cols-1; ++i ) {
-
-			// evaluate Householder matrix nullifying columns
-			column = new NDArray(_HHmatrix({i,rows,  i,i+1}, true));
-
-            _x = _HHmatrix.e<T>(i,i);
-            _y = _HHbidiag.e<T>(i,i);
-
-			Householder<T>::evalHHmatrixDataI(*column, _x, _y);
-
-            _HHmatrix.p<T>(i, i, _x);
-            _HHbidiag.p<T>(i, i, _y);
-
-			// multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner
-			bottomRightCorner =  new NDArray(_HHmatrix({i,rows,  i+1,cols}, true));	// {i, cols}
-			Householder<T>::mulLeft(*bottomRightCorner, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.e<T>(i,i));
-
-			delete bottomRightCorner;
-			delete column;
-
-			if(i == cols-2)
-				continue; 										// do not apply right multiplying at last iteration
-
-			// evaluate Householder matrix nullifying rows
-			row  = new NDArray(_HHmatrix({i,i+1,  i+1,cols}, true));
-
-            _x = _HHmatrix.e<T>(i,i+1);
-            _y = _HHbidiag.e<T>(i,i+1);
-
-			Householder<T>::evalHHmatrixDataI(*row, _x, _y);
-
-            _HHmatrix.p<T>(i, i+1, _x);
-            _HHbidiag.p<T>(i, i+1, _y);
-
-			// multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P
-			bottomRightCorner = new NDArray(_HHmatrix({i+1,rows,  i+1,cols}, true));  // {i, rows}
-
-			Householder<T>::mulRight(*bottomRightCorner, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.e<T>(i,i+1));
-
-			delete bottomRightCorner;
-			delete row;
-		}
-
-		row  = new NDArray(_HHmatrix({cols-2,cols-1, cols-1,cols}, true));
-
-		_x = _HHmatrix.e<T>(cols-2,cols-1);
-		_y = _HHbidiag.e<T>(cols-2,cols-1);
-
-		Householder<T>::evalHHmatrixDataI(*row, _x, _y);
-
-        _HHmatrix.p<T>(cols-2,cols-1, _x);
-        _HHbidiag.p<T>(cols-2,cols-1, _y);
-
-		delete row;
-
-		column = new NDArray(_HHmatrix({cols-1,rows, cols-1,cols}, true));
-
-		_x = _HHmatrix.e<T>(cols-1,cols-1);
-		_y = _HHbidiag.e<T>(cols-1,cols-1);
-
-		Householder<T>::evalHHmatrixDataI(*column, _x, _y);
-
-		_HHmatrix.p<T>(cols-1, cols-1, _x);
-        _HHbidiag.p<T>(cols-1, cols-1, _y);
-
-		delete column;
-	}
-
-//////////////////////////////////////////////////////////////////////////
-void BiDiagonalUp::evalData() {
-	auto xType = _HHmatrix.dataType();
-
-	BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES);
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-HHsequence BiDiagonalUp::makeHHsequence_(const char type) const {
-
-	if(type == 'u') {
-
-    	const int diagSize = _HHbidiag.sizeAt(0);
-    	auto colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(),  {diagSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext());
-
-	    for(int i = 0; i < diagSize; ++i)
-	        colOfCoeffs.p(i, _HHmatrix.e<T>(i,i));
-
-    	return HHsequence(_HHmatrix, colOfCoeffs, type);
-    }
-    else {
-
-    	const int diagUpSize = _HHbidiag.sizeAt(0) - 1;
-		NDArray colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(), {diagUpSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext());
-
-    	for(int i = 0; i < diagUpSize; ++i)
-        	colOfCoeffs.p(i, _HHmatrix.e<T>(i,i+1));
-
-    	HHsequence result(_HHmatrix, colOfCoeffs, type);
-    	result._diagSize = diagUpSize;
-    	result._shift  = 1;
-
-    	return result;
-    }
-}
-
-	HHsequence BiDiagonalUp::makeHHsequence(const char type) const {
-		auto xType = _HHmatrix.dataType();
-
-		BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES);
-	}
-
-
-
-BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES);
-BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type) const, FLOAT_TYPES);
-
-}
-}
-}
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/hhColPivQR.cpp b/libnd4j/include/helpers/cpu/hhColPivQR.cpp
deleted file mode 100644
index e118b0bf1..000000000
--- a/libnd4j/include/helpers/cpu/hhColPivQR.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by Yurii Shyrma on 11.01.2018
-//
-
-#include <helpers/hhColPivQR.h>
-#include <helpers/householder.h>
-#include <array/NDArrayFactory.h>
-
-namespace sd {
-namespace ops {
-namespace helpers {
-
-
-//////////////////////////////////////////////////////////////////////////
-HHcolPivQR::HHcolPivQR(const NDArray& matrix) {
-
-    _qr = matrix;
-    _diagSize = math::nd4j_min<int>(matrix.sizeAt(0), matrix.sizeAt(1));    
-    _coeffs = NDArrayFactory::create(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext());
-    
-    _permut = NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext());
-
-    evalData();    
-}
-
-    void HHcolPivQR::evalData() {
-        BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES);
-    }
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void HHcolPivQR::_evalData() {
-
-    int rows = _qr.sizeAt(0);
-    int cols = _qr.sizeAt(1);    
-        
-    auto transp = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext());
-    auto normsUpd = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext());
-    auto normsDir = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext());
-          
-    int transpNum = 0;
-
-    for (int k = 0; k < cols; ++k) {
-        
-        T norm = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).e<T>(0);
-        normsDir.p<T>(k, norm);
-        normsUpd.p<T>(k, norm);
-    }
-
-    T normScaled = (normsUpd.reduceNumber(reduce::Max)).e<T>(0) * DataTypeUtils::eps<T>();
-    T threshold1 = normScaled * normScaled / (T)rows;     
-    T threshold2 = math::nd4j_sqrt<T,T>(DataTypeUtils::eps<T>());
-
-    T nonZeroPivots = _diagSize; 
-    T maxPivot = 0.;
-
-    for(int k = 0; k < _diagSize; ++k) {
-
-        int biggestColIndex = normsUpd({0,0, k,-1}).indexReduceNumber(indexreduce::IndexMax).e<int>(0);
-        T biggestColNorm = normsUpd({0,0, k,-1}).reduceNumber(reduce::Max).e<T>(0);
-        T biggestColSqNorm = biggestColNorm * biggestColNorm;
-        biggestColIndex += k;
-    
-        if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k))
-            nonZeroPivots = k;
-        
-        transp.p<T>(k, (T)biggestColIndex);
-
-        if(k != biggestColIndex) {
-        
-            auto temp1 = new NDArray(_qr({0,0, k,k+1}, true));
-            auto temp2 = new NDArray(_qr({0,0, biggestColIndex,biggestColIndex+1}, true));
-            auto temp3 = *temp1;
-            temp1->assign(temp2);
-            temp2->assign(temp3);
-            delete temp1;
-            delete temp2;
-
-            T e0 = normsUpd.e<T>(k);
-            T e1 = normsUpd.e<T>(biggestColIndex);
-            normsUpd.p(k, e1);
-            normsUpd.p(biggestColIndex, e0);
-            //math::nd4j_swap<T>(normsUpd(k), normsUpd(biggestColIndex));
-
-            e0 = normsDir.e<T>(k);
-            e1 = normsDir.e<T>(biggestColIndex);
-            normsDir.p(k, e1);
-            normsDir.p(biggestColIndex, e0);
-            //math::nd4j_swap<T>(normsDir(k), normsDir(biggestColIndex));
-                        
-            ++transpNum;
-        }
-        
-        T normX;
-        NDArray* qrBlock = new NDArray(_qr({k,rows, k,k+1}, true));
-        T c;
-        Householder<T>::evalHHmatrixDataI(*qrBlock, c, normX);
-        _coeffs.p<T>(k, c);
-        delete qrBlock;        
-
-        _qr.p<T>(k,k, normX);
-        
-        T max = math::nd4j_abs<T>(normX);
-        if(max > maxPivot) 
-            maxPivot = max;
-        
-        if(k < rows && (k+1) < cols) {
-            qrBlock   = new NDArray(_qr({k,  rows,  k+1,cols}, true));
-            auto tail = new NDArray(_qr({k+1,rows,  k, k+1},   true));
-            Householder<T>::mulLeft(*qrBlock, *tail, _coeffs.e<T>(k));
-            delete qrBlock;
-            delete tail;
-        }
-
-        for (int j = k + 1; j < cols; ++j) {            
-            
-            if (normsUpd.e<T>(j) != (T)0.f) {
-                T temp = math::nd4j_abs<T>(_qr.e<T>(k, j)) / normsUpd.e<T>(j);
-                temp = (1. + temp) * (1. - temp);
-                temp = temp < (T)0. ? (T)0. : temp;
-                T temp2 = temp * normsUpd.e<T>(j) * normsUpd.e<T>(j) / (normsDir.e<T>(j)*normsDir.e<T>(j));
-                
-                if (temp2 <= threshold2) {          
-                    if(k+1 < rows && j < cols)
-                        normsDir.p<T>(j, _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).e<T>(0));
-
-                    normsUpd.p<T>(j, normsDir.e<T>(j));
-                } 
-                else 
-                    normsUpd.p<T>(j, normsUpd.e<T>(j) * math::nd4j_sqrt<T, T>(temp));
-            }
-        }
-    }
-
-    _permut.setIdentity();
-    
-    for(int k = 0; k < _diagSize; ++k) {
-
-        int idx = transp.e<int>(k);
-        auto temp1 = new NDArray(_permut({0,0, k, k+1},    true));
-        auto temp2 = new NDArray(_permut({0,0, idx,idx+1}, true));
-        auto  temp3 = *temp1;
-        temp1->assign(temp2);
-        temp2->assign(temp3);
-        delete temp1;
-        delete temp2;
-    }    
-}
-
-    BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES);
-
-}
-}
-}
-
diff --git a/libnd4j/include/helpers/cpu/householder.cpp b/libnd4j/include/helpers/cpu/householder.cpp
deleted file mode 100644
index 69d4ca3db..000000000
--- a/libnd4j/include/helpers/cpu/householder.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by Yurii Shyrma on 18.12.2017
-//
-
-#include <helpers/householder.h>
-#include <array/NDArrayFactory.h>
-
-namespace sd {
-namespace ops {
-namespace helpers {
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-NDArray Householder<T>::evalHHmatrix(const NDArray& x) {
-
-	// input validation
-	if(!x.isVector() && !x.isScalar())
-		throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!");
-
-	auto w = NDArrayFactory::create(x.ordering(),  {(int)x.lengthOf(), 1}, x.dataType(), x.getContext());							// column-vector
-	auto wT = NDArrayFactory::create(x.ordering(), {1, (int)x.lengthOf()}, x.dataType(), x.getContext());							// row-vector (transposed w)
-
-	T coeff;
-	T normX = x.reduceNumber(reduce::Norm2).e<T>(0);
-
-	if(normX*normX - x.e<T>(0) * x.e<T>(0) <= DataTypeUtils::min<T>() || x.lengthOf() == 1) {
-
-		normX = x.e<T>(0);
-		coeff = 0.f;
-		w = 0.f;
-
-	}
-	else {
-
-		if(x.e<T>(0) >= (T)0.f)
-			normX = -normX;									// choose opposite sign to lessen roundoff error
-
-		T u0 = x.e<T>(0) - normX;
-		coeff = -u0 / normX;
-		w.assign(x / u0);
-	}
-
-	w.p(Nd4jLong(0), 1.f);
-	wT.assign(&w);
-
-	NDArray identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext());
-	identity.setIdentity();																			// identity matrix
-
-	return identity - mmul(w, wT) * coeff;
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void Householder<T>::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) {
-
-	// input validation
-	if(!x.isVector() && !x.isScalar())
-		throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must be vector or scalar!");
-
-	if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1)
-		throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!");
-
-	normX = x.reduceNumber(reduce::Norm2, nullptr).e<T>(0);
-
-	if(normX*normX - x.e<T>(0) * x.e<T>(0) <= DataTypeUtils::min<T>() || x.lengthOf() == 1) {
-
-		normX = x.e<T>(0);
-		coeff = (T)0.f;
-		tail = (T)0.f;
-	}
-	else {
-
-		if(x.e<T>(0) >= (T)0.f)
-			normX = -normX;									// choose opposite sign to lessen roundoff error
-
-		T u0 = x.e<T>(0) - normX;
-		coeff = -u0 / normX;
-
-		if(x.isRowVector())
-			tail.assign(static_cast<const NDArray&>(x({0,0, 1,-1})) / u0);
-		else
-			tail.assign(static_cast<const NDArray&>(x({1,-1, 0,0,})) / u0);
-	}
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void Householder<T>::evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX) {
-
-	int rows = (int)x.lengthOf()-1;
-	int num = 1;
-
-	if(rows == 0) {
-		rows = 1;
-		num = 0;
-	}
-
-	auto tail = NDArrayFactory::create(x.ordering(), {rows, 1}, x.dataType(), x.getContext());
-	evalHHmatrixData(x, tail, coeff, normX);
-
-	if(x.isRowVector()) {
-		auto temp = x({0,0,  num, x.sizeAt(1)}, true);
-		temp.assign(tail);
-	}
-	else {
-		auto temp = x({num,x.sizeAt(0), 0,0}, true);
-		temp.assign(tail);
-	}
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void Householder<T>::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) {
-
-	// if(matrix.rankOf() != 2)
-	// 	throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !";
-
-	if(matrix.sizeAt(0) == 1) {
-        matrix *= (T) 1.f - coeff;
-    }
-    else if(coeff != (T)0.f) {
-
-  		auto bottomPart = new NDArray(matrix({1,matrix.sizeAt(0), 0,0}, true));
-		auto bottomPartCopy = *bottomPart;
-
-		if(tail.isColumnVector()) {
-
-			auto column = tail;
-			auto row = tail.transpose();
-    		auto resultingRow = mmul(row, bottomPartCopy);
-    		auto fistRow = matrix({0,1, 0,0}, true);
-    		resultingRow += fistRow;
-    		fistRow -= resultingRow * coeff;
-    		*bottomPart -= mmul(column, resultingRow) * coeff;
-		}
-		else {
-
-			auto row = tail;
-			auto column = tail.transpose();
-    		auto resultingRow = mmul(row, bottomPartCopy);
-    		auto fistRow = matrix({0,1, 0,0}, true);
-    		resultingRow += fistRow;
-    		fistRow -= resultingRow * coeff;
-    		*bottomPart -= mmul(column, resultingRow) * coeff;
-		}
-		delete bottomPart;
-	}
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void Householder<T>::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) {
-
-	// if(matrix.rankOf() != 2)
-	// 	throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !";
-
-	if(matrix.sizeAt(1) == 1)
-    	matrix *= (T)1.f - coeff;
-
-  	else if(coeff != (T)0.f) {
-
-  		auto rightPart = new NDArray(matrix({0,0, 1,matrix.sizeAt(1)}, true));
-		auto rightPartCopy = *rightPart;
-		auto fistCol = new NDArray(matrix({0,0, 0,1}, true));
-
-  		if(tail.isColumnVector()) {
-
-			auto column = tail;
-			auto row = tail.transpose();
-    		auto resultingCol = mmul(rightPartCopy, column);
-    		resultingCol += *fistCol;
-    		*fistCol -= resultingCol * coeff;
-    		*rightPart -= mmul(resultingCol, row) * coeff;
-		}
-		else {
-
-			auto row = tail;
-			auto column = tail.transpose();
-    		auto resultingCol = mmul(rightPartCopy, column);
-    		resultingCol += *fistCol;
-    		*fistCol -= resultingCol * coeff;
-    		*rightPart -= mmul(resultingCol, row) * coeff;
-		}
-  		delete rightPart;
-  		delete fistCol;
-	}
-}
-
-
-template class ND4J_EXPORT Householder<float>;
-template class ND4J_EXPORT Householder<float16>;
-template class ND4J_EXPORT Householder<bfloat16>;
-template class ND4J_EXPORT Householder<double>;
-
-
-
-
-
-
-
-}
-}
-}
diff --git a/libnd4j/include/helpers/cpu/svd.cpp b/libnd4j/include/helpers/cpu/svd.cpp
index 4e257b267..8a320f6de 100644
--- a/libnd4j/include/helpers/cpu/svd.cpp
+++ b/libnd4j/include/helpers/cpu/svd.cpp
@@ -22,7 +22,6 @@
 #include <helpers/jacobiSVD.h>
 #include <helpers/biDiagonalUp.h>
 #include <array/ResultSet.h>
-#include <array/NDArrayFactory.h>
 
 
 namespace sd {
@@ -59,19 +58,19 @@ SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const
     if (_transp)
         math::nd4j_swap<bool>(_calcU, _calcV);
 
-    _s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
-    _m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
-    _m.assign(0.);
+    _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext());
+    _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext());
+    // _m.assign(0.);
 
     if (_calcU)
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
+        _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext());
     else
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
-    _u.assign(0.);
+        _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext());
+    // _u.assign(0.);
 
     if (_calcV) {
-        _v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
-        _v.assign(0.);
+        _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext());
+        // _v.assign(0.);
     }
 
     evalData(matrix);
@@ -106,19 +105,19 @@ SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const
     if (_transp)
         math::nd4j_swap<bool>(_calcU, _calcV);
 
-    _s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
-    _m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
-    _m.assign(0.f);
+    _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext());
+    _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext());
+    // _m.assign(0.f);
 
     if (_calcU)
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
+        _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext());
     else
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
-    _u.assign(0.);
+        _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext());
+    // _u.assign(0.);
 
     if (_calcV) {
-        _v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
-        _v.assign(0.);
+        _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext());
+        // _v.assign(0.);
     }
 }
 
@@ -131,28 +130,27 @@ void SVD<T>::deflation1(int col1, int shift, int ind, int size) {
         throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !");
 
     int first = col1 + shift;
-    T cos = _m.e<T>(first, first);
-    T sin = _m.e<T>(first+ind, first);
+    T cos = _m.t<T>(first, first);
+    T sin = _m.t<T>(first+ind, first);
     T denom = math::nd4j_sqrt<T, T>(cos*cos + sin*sin);
 
     if (denom == (T)0.) {
-
-        _m.p(first+ind, first+ind, 0.f);
+        _m.r<T>(first+ind, first+ind) = (T)0;
         return;
     }
 
     cos /= denom;
     sin /= denom;
 
-    _m.p(first,first, denom);
-    _m.p(first+ind, first, 0.f);
-    _m.p(first+ind, first+ind, 0.f);
+    _m.r<T>(first,first) = denom;
+    _m.r<T>(first+ind, first) = (T)0;
+    _m.r<T>(first+ind, first+ind) = (T)0;
 
-    auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2},  _m.getContext());
-    rotation.p(0, 0, cos);
-    rotation.p(0, 1, -sin);
-    rotation.p(1, 0, sin);
-    rotation.p(1, 1, cos);
+    NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
+
+    rotation.r<T>(0,0) = rotation.r<T>(1,1) = cos;
+    rotation.r<T>(0,1) = -sin;
+    rotation.r<T>(1,0) = sin;
 
     if (_calcU) {
         auto temp = _u({col1,col1+size+1, 0,0}, true);
@@ -172,28 +170,26 @@ void SVD<T>::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, i
     if(size <= 0)
         throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !");
 
-    T cos = _m.e<T>(col1M+ind1, col1M);
-    T sin = _m.e<T>(col1M+ind2, col1M);
+    T cos = _m.t<T>(col1M+ind1, col1M);
+    T sin = _m.t<T>(col1M+ind2, col1M);
     T denom = math::nd4j_sqrt<T,T>(cos*cos + sin*sin);
 
     if (denom == (T)0.)  {
-
-      _m.p(col1M + ind1, col1M + ind1, _m.e<T>(col1M + ind2, col1M + ind2));
+      _m.r<T>(col1M+ind1, col1M+ind1) = _m.t<T>(col1M+ind2, col1M+ind2);
       return;
     }
 
     cos /= denom;
     sin /= denom;
-    _m.p(col1M + ind1, col1M, denom);
-    _m.p(col1M + ind2, col1M + ind2, _m.e<T>(col1M + ind1, col1M + ind1));
-    _m.p(col1M + ind2, col1M, 0.f);
+    _m.r<T>(col1M+ind1, col1M)      = denom;
+    _m.r<T>(col1M+ind2, col1M+ind2) = _m.t<T>(col1M+ind1, col1M+ind1);
+    _m.r<T>(col1M+ind2, col1M)      = (T)0;
 
-    auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2}, _m.getContext());
-    rotation.p(0,0, cos);
-    rotation.p(1,1, cos);
+    NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
 
-    rotation.p(0,1, -sin);
-    rotation.p(1,0, sin);
+    rotation.r<T>(0,0) = rotation.r<T>(1,1) = cos;
+    rotation.r<T>(0,1) = -sin;
+    rotation.r<T>(1,0) = sin;
 
     if (_calcU) {
         auto temp = _u({col1U,col1U+size+1, 0,0}, true);
@@ -216,40 +212,40 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
 
     const int len = col2 + 1 - col1;
 
-    auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true));
+    NDArray colVec0 = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true);
 
-    auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c');
+    NDArray diagInterval = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c');
 
     const T almostZero = DataTypeUtils::min<T>();
     T maxElem;
     if(len == 1)
-        maxElem = math::nd4j_abs<T>(diagInterval.template e<T>(0));
+        maxElem = math::nd4j_abs<T>(diagInterval.template t<T>(0));
     else
-        maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
-    T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e<T>(0);
+        maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template t<T>(0);
+    T maxElem0 = colVec0.reduceNumber(reduce::AMax).template t<T>(0);
 
     T eps = math::nd4j_max<T>(almostZero, DataTypeUtils::eps<T>() * maxElem);
     T epsBig = (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(maxElem0, maxElem);
 
-    if(diagInterval.template e<T>(0) < epsBig)
-        diagInterval.p(Nd4jLong(0), epsBig);
+    if(diagInterval.template t<T>(0) < epsBig)
+        diagInterval.r<T>(0) = epsBig;
 
     for(int i=1; i < len; ++i)
-        if(math::nd4j_abs<T>(colVec0->template e<T>(i)) < eps)
-            colVec0->p(i, 0.f);
+        if(math::nd4j_abs<T>(colVec0.template t<T>(i)) < eps)
+            colVec0.r<T>(i) = (T)0;
 
     for(int i=1; i < len; i++)
-        if(diagInterval.template e<T>(i) < epsBig) {
+        if(diagInterval.template t<T>(i) < epsBig) {
             deflation1(col1, shift, i, len);
             for(int i = 0; i < len; ++i)
-                diagInterval.p(i, _m.e<T>(col1+shift+i,col1+shift+i));
+                diagInterval.r<T>(i) = _m.t<T>(col1+shift+i,col1+shift+i);
         }
 
     {
 
         bool totDefl = true;
         for(int i=1; i < len; i++)
-            if(colVec0->template e<T>(i) >= almostZero) {
+            if(colVec0.template t<T>(i) >= almostZero) {
                 totDefl = false;
                 break;
             }
@@ -261,7 +257,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
             int p = 1;
 
             for(int i=1; i<len; ++i)
-                if(math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero)
+                if(math::nd4j_abs<T>(diagInterval.template t<T>(i)) < almostZero)
                     permut[p++] = i;
 
             int k = 1, m = ind+1;
@@ -271,7 +267,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
                     permut[p] = m++;
                 else if(m >= len)
                     permut[p] = k++;
-                else if(diagInterval.template e<T>(k) < diagInterval.template e<T>(m))
+                else if(diagInterval.template t<T>(k) < diagInterval.template t<T>(m))
                     permut[p] = m++;
                 else
                     permut[p] = k++;
@@ -281,7 +277,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
         if(totDefl) {
             for(int i=1; i<len; ++i) {
                 int ki = permut[i];
-                if(math::nd4j_abs<T>(diagInterval.template e<T>(ki)) < almostZero || diagInterval.template e<T>(0) < diagInterval.template e<T>(ki))
+                if(math::nd4j_abs<T>(diagInterval.template t<T>(ki)) < almostZero || diagInterval.template t<T>(0) < diagInterval.template t<T>(ki))
                     permut[i-1] = permut[i];
                 else {
                     permut[i-1] = 0;
@@ -303,39 +299,26 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
             const int ki = permut[len - (totDefl ? i+1 : i)];
             const int jac = tCol[ki];
 
-            T _e0 = diagInterval.template e<T>(jac);
-            //math::nd4j_swap<T>(diagInterval)(i), (*diagInterval)(jac));
-            diagInterval.p(jac, diagInterval.template e<T>(i));
-            diagInterval.p(i, _e0);
+            math::nd4j_swap<T>(diagInterval.r<T>(i), diagInterval.r<T>(jac));
 
-            if(i!=0 && jac!=0) {
-                _e0 = colVec0->template e<T>(jac);
-                //math::nd4j_swap<T>((*colVec0)(i), (*colVec0)(jac));
-                colVec0->p(jac, colVec0->template e<T>(i));
-                colVec0->p(i, _e0);
-            }
+            if(i!=0 && jac!=0)
+                math::nd4j_swap<T>(colVec0.r<T>(i), colVec0.r<T>(jac));
 
             if (_calcU) {
-                auto temp1 = _u({col1,col1+len+1, col1+i,  col1+i+1}, true);
-                auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                auto temp1 = _u({col1,col1+len+1, col1+i,  col1+i+1});
+                auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1});
+                temp1.swapUnsafe(temp2);
             }
             else {
-                auto temp1 = _u({0,2, col1+i,   col1+i+1}, true);
-                auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                auto temp1 = _u({0,2, col1+i,   col1+i+1});
+                auto temp2 = _u({0,2, col1+jac, col1+jac+1});
+                temp1.swapUnsafe(temp2);
             }
 
             if(_calcV) {
-                auto temp1 = _v({row1W,row1W+len, col1W+i,   col1W+i+1}, true);
-                auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                auto temp1 = _v({row1W,row1W+len, col1W+i,   col1W+i+1});
+                auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1});
+                temp1.swapUnsafe(temp2);
             }
 
             const int tI = tInd[i];
@@ -351,19 +334,17 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
     {
         int i = len-1;
 
-        while(i > 0 && (math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
+        while(i > 0 && (math::nd4j_abs<T>(diagInterval.template t<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0.template t<T>(i)) < almostZero))
             --i;
 
         for(; i > 1; --i) {
-            if( (diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
-                if (math::nd4j_abs<T>(diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) >= epsBig)
+            if( (diagInterval.template t<T>(i) - diagInterval.template t<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
+                if (math::nd4j_abs<T>(diagInterval.template t<T>(i) - diagInterval.template t<T>(i-1)) >= epsBig)
                     throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !");
                 deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len);
             }
         }
     }
-
-    delete colVec0;
 }
 
 
@@ -374,10 +355,10 @@ T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons
     auto len = permut.lengthOf();
     T res = 1.;
     T item;
-    for(Nd4jLong i=0; i<len; ++i) {
-        auto j = permut.e<int>(i);
-        item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff));
-        res += item * col0.e<T>(j);
+    for(int i=0; i<len; ++i) {
+        int j = (int)permut.t<T>(i);
+        item = col0.t<T>(j) / ((diagShifted.t<T>(j) - diff) * (diag.t<T>(j) + shift + diff));
+        res += item * col0.t<T>(j);
     }
 
     return res;
@@ -390,34 +371,34 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
     auto len = col0.lengthOf();
     auto curLen = len;
 
-    while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f)
+    while(curLen > 1 && col0.t<T>(curLen-1) == (T)0.f)
         --curLen;
 
     for (Nd4jLong k = 0; k < len; ++k)  {
 
-        if (col0.e<T>(k) == (T)0.f || curLen==1) {
+        if (col0.t<T>(k) == (T)0.f || curLen==1) {
 
-            singVals.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
-            mus.p(k, 0.f);
-            shifts.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
+            singVals.r<T>(k) = k==0 ? col0.t<T>(0) : diag.t<T>(k);
+            mus.r<T>(k) = (T)0;
+            shifts.r<T>(k) = k==0 ? col0.t<T>(0) : diag.t<T>(k);
             continue;
         }
 
-        T left = diag.e<T>(k);
+        T left = diag.t<T>(k);
         T right;
 
         if(k==curLen-1)
-            right = diag.e<T>(curLen-1) + col0.reduceNumber(reduce::Norm2).e<T>(0);
+            right = diag.t<T>(curLen-1) + col0.reduceNumber(reduce::Norm2).t<T>(0);
         else {
 
             int l = k+1;
-            while(col0.e<T>(l) == (T)0.f) {
+            while(col0.t<T>(l) == (T)0.f) {
                 ++l;
                 if(l >= curLen)
                     throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !");
             }
 
-            right = diag.e<T>(l);
+            right = diag.t<T>(l);
         }
 
         T mid = left + (right - left) / (T)2.;
@@ -440,7 +421,7 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
         }
 
         T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift);
-        T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift);
+        T fCur  = secularEq(muCur, col0, diag, permut, diagShifted, shift);
 
         if (math::nd4j_abs<T>(fPrev) < math::nd4j_abs<T>(fCur)) {
             math::nd4j_swap<T>(fPrev, fCur);
@@ -464,13 +445,12 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
 
             if (shift == left  && (muCur < (T)0. || muCur > right - left))
                 useBisection = true;
-            if (shift == right && (muCur < -(right - left) || muCur > (T)0.))
+            else if (shift == right && (muCur < -(right - left) || muCur > (T)0.))
                 useBisection = true;
-            if (math::nd4j_abs<T>(fCur) > math::nd4j_abs<T>(fPrev) &&  math::nd4j_abs<T>(fCur - fPrev) > (T)16. * DataTypeUtils::eps<T>())
+            else if (math::nd4j_abs<T>(fCur) > math::nd4j_abs<T>(fPrev) &&  math::nd4j_abs<T>(fCur - fPrev) > (T)16. * DataTypeUtils::eps<T>())
                 useBisection = true;
         }
 
-
         if (useBisection) {
 
             T leftShifted, rightShifted;
@@ -479,7 +459,6 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
                 rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6);
             }
             else {
-
                 leftShifted = -(right - left) * (T)0.6;
                 rightShifted = -DataTypeUtils::min<T>();
             }
@@ -502,14 +481,12 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
             }
             muCur = (leftShifted + rightShifted) / (T)2.;
         }
-        singVals.p(k, shift + muCur);
-        shifts.p(k, shift);
-        mus.p(k, muCur);
+        singVals.r<T>(k) = shift + muCur;
+        shifts.r<T>(k) = shift;
+        mus.r<T>(k) = muCur;
     }
-
 }
 
-
 //////////////////////////////////////////////////////////////////////////
 template <typename T>
 void SVD<T>::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals,  const NDArray& shifts, const NDArray& mus, NDArray& zhat) {
@@ -517,29 +494,29 @@ void SVD<T>::perturb(const NDArray& col0, const NDArray& diag, const NDArray& pe
     int n = col0.lengthOf();
     int m = permut.lengthOf();
     if(m==0) {
-        zhat.assign(0.);
+        zhat.nullify();
         return;
     }
 
-    int last = permut.e<int>(m-1);
+    int last = permut.t<T>(m-1);
 
     for (int k = 0; k < n; ++k) {
 
-        if (col0.e<T>(k) == (T)0.f)
-            zhat.p(k, (T)0.f);
+        if (col0.t<T>(k) == (T)0.f)
+            zhat.r<T>(k) = (T)0;
         else {
-            T dk   = diag.e<T>(k);
-            T prod = (singVals.e<T>(last) + dk) * (mus.e<T>(last) + (shifts.e<T>(last) - dk));
+            T dk   = diag.t<T>(k);
+            T prod = (singVals.t<T>(last) + dk) * (mus.t<T>(last) + (shifts.t<T>(last) - dk));
 
             for(int l = 0; l<m; ++l) {
-                int i = permut.e<int>(l);
+                int i = (int)permut.t<T>(l);
                 if(i!=k) {
-                    int j = i<k ? i : permut.e<int>(l-1);
-                    prod *= ((singVals.e<T>(j)+dk) / ((diag.e<T>(i)+dk))) * ((mus.e<T>(j)+(shifts.e<T>(j)-dk)) / ((diag.e<T>(i)-dk)));
+                    int j = i<k ? i : (int)permut.t<T>(l-1);
+                    prod *= ((singVals.t<T>(j)+dk) / ((diag.t<T>(i)+dk))) * ((mus.t<T>(j)+(shifts.t<T>(j)-dk)) / ((diag.t<T>(i)-dk)));
                 }
             }
             T tmp = math::nd4j_sqrt<T,T>(prod);
-            zhat.p(k, col0.e<T>(k) > (T)0.f ? tmp : -tmp);
+            zhat.r<T>(k) = col0.t<T>(k) > (T)0 ? tmp : -tmp;
         }
     }
 }
@@ -555,48 +532,46 @@ void SVD<T>::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArra
 
     for (int k = 0; k < n; ++k) {
 
-        auto colU = new NDArray(U({0,0, k,k+1}, true));
-        *colU = 0.;
-        NDArray* colV = nullptr;
+        NDArray colU  = U({0,0, k,k+1});
+        colU.nullify();
+
+        NDArray colV;
 
         if (_calcV) {
-            colV = new NDArray(V({0,0, k,k+1}, true));
-            *colV = 0.;
+            colV = V({0,0, k,k+1});
+            colV.nullify();
         }
 
-        if (zhat.e<T>(k) == (T)0.f) {
-            colU->p(k, 1.f);
+        if (zhat.t<T>(k) == (T)0.f) {
+            colU.r<T>(k) = (T)1;
 
             if (_calcV)
-                colV->p(k, 1.f);
+                colV.r<T>(k) = (T)1;
         }
         else {
 
             for(int l = 0; l < m; ++l) {
-                int i = perm.e<int>(l);
-                U.p(i,k, zhat.e<T>(i)/(((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
+                int i = (int)perm.t<T>(l);
+                U.r<T>(i,k) =  zhat.t<T>(i)/(((diag.t<T>(i) - shifts.t<T>(k)) - mus.t<T>(k)) )/( (diag.t<T>(i) + singVals.t<T>(k)));
             }
-            U.p(n,k, 0.f);
-            *colU /= colU->reduceNumber(reduce::Norm2);
+            U.r<T>(n,k) = (T)0;
+            colU /= colU.reduceNumber(reduce::Norm2);
 
             if (_calcV) {
 
                 for(int l = 1; l < m; ++l){
-                    int i = perm.e<T>(l);
-                    V.p(i,k, diag.e<T>(i) * zhat.e<T>(i) / (((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
+                    int i = perm.t<T>(l);
+                    V.r<T>(i,k) = diag.t<T>(i) * zhat.t<T>(i) / (((diag.t<T>(i) - shifts.t<T>(k)) - mus.t<T>(k)) )/( (diag.t<T>(i) + singVals.t<T>(k)));
                 }
-                V.p(0,k, -1.f);
-                *colV /= colV->reduceNumber(reduce::Norm2);
+                V.r<T>(0,k) = (T)-1;
+                colV /= colV.reduceNumber(reduce::Norm2);
             }
         }
-        delete colU;
-        if (_calcV)
-            delete colV;
     }
 
-    auto colU = U({0,0, n,n+1}, true);
-    colU = 0.;
-    colU.p(n, 1.);
+    NDArray colU = U({0,0, n,n+1});
+    colU.nullify();
+    colU.r<T>(n) = (T)1;
 }
 
 
@@ -608,26 +583,29 @@ void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA
     auto col0 = _m({col1, col1+size, col1, col1+1}, true);
     auto diag = static_cast<const NDArray&>(_m({col1, col1+size, col1, col1+size}, true).diagonal('c'));
 
-    diag.p(Nd4jLong(0), T(0));
-    singVals = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    U = NDArrayFactory::create<T>(_u.ordering(), {size+1, size+1}, _u.getContext());
+    diag.r<T>(0) = (T)0;
+    singVals = NDArray(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext());
+    U = NDArray(_u.ordering(), {size+1, size+1}, _u.dataType(), _u.getContext());
     if (_calcV)
-        V = NDArrayFactory::create<T>(_v.ordering(), {size, size}, _v.getContext());
+        V = NDArray(_v.ordering(), {size, size}, _v.dataType(), _v.getContext());
 
     int curSize = size;
-    while(curSize > 1 && diag.template e<T>(curSize-1) == (T)0.f)
+    while(curSize > 1 && diag.template t<T>(curSize-1) == (T)0.f)
         --curSize;
 
     int m = 0;
-    std::vector<T> indices;
+    std::vector<int> indices;
     for(int k = 0; k < curSize; ++k)
-        if(math::nd4j_abs<T>(col0.template e<T>(k)) > almostZero)
-            indices.push_back((T)k);
+        if(math::nd4j_abs<T>(col0.template t<T>(k)) > almostZero)
+            indices.push_back(k);
 
-    auto permut = NDArrayFactory::create<T>(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext());
-    auto shifts = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    auto mus    = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    auto zhat   = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
+    NDArray permut(_m.ordering(), {(int)indices.size()}, _m.dataType(), _m.getContext());
+    for(int k = 0; k < indices.size(); ++k)
+        permut.r<T>(k) = (T)indices[k];
+
+    NDArray shifts(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext());
+    NDArray mus(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext());
+    NDArray zhat(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext());
 
     calcSingVals(col0, diag, permut, singVals, shifts, mus);
     perturb(col0, diag, permut, singVals, shifts, mus, zhat);
@@ -635,53 +613,39 @@ void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA
 
     for(int i=0; i<curSize-1; ++i) {
 
-        if(singVals.e<T>(i) > singVals.e<T>(i+1)) {
-            T _e0 = singVals.e<T>(i);
-            T _e1 = singVals.e<T>(i+1);
-            //math::nd4j_swap<T>(singVals(i),singVals(i+1));
-            singVals.p(i, _e1);
-            singVals.p(i+1, _e0);
+        if(singVals.t<T>(i) > singVals.t<T>(i+1)) {
 
-            auto temp1 = U({0,0, i,i+1}, true);
-            auto temp2 = U({0,0, i+1,i+2}, true);
-            auto temp3 = temp1;
-            temp1.assign(temp2);
-            temp2.assign(temp3);
+            math::nd4j_swap<T>(singVals.r<T>(i), singVals.r<T>(i+1));
+
+            auto temp1 = U({0,0, i,i+1});
+            auto temp2 = U({0,0, i+1,i+2});
+            temp1.swapUnsafe(temp2);
 
             if(_calcV) {
-                auto temp1 = V({0,0, i,i+1}, true);
-                auto temp2 = V({0,0, i+1,i+2}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                auto temp1 = V({0,0, i,i+1});
+                auto temp2 = V({0,0, i+1,i+2});
+                temp1.swapUnsafe(temp2);
             }
         }
     }
 
-    auto temp1 = singVals({0,curSize, 0,0}, true);
-    for (int e = 0; e < curSize / 2; ++e) {
-        T tmp = temp1.e<T>(e);
-        temp1.p(e, temp1.e<T>(curSize-1-e));
-        temp1.p(curSize-1-e, tmp);
-    }
+    auto temp1 = singVals({0,curSize, 0,0});
+    for (int e = 0; e < curSize / 2; ++e)
+        math::nd4j_swap<T>(temp1.r<T>(e), temp1.r<T>(curSize-1-e));
 
     auto temp2 = U({0,0, 0,curSize}, true);
     for(int i = 0; i < curSize/2; ++i) {
-        auto temp3 = temp2({0,0, i,i+1}, true);
-        auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
-        auto temp5 = temp3;
-        temp3.assign(temp4);
-        temp4.assign(temp5);
+        auto temp3 = temp2({0,0, i,i+1});
+        auto temp4 = temp2({0,0, curSize-1-i,curSize-i});
+        temp3.swapUnsafe(temp4);
     }
 
     if (_calcV) {
         auto temp2 = V({0,0, 0,curSize}, true);
         for(int i = 0; i < curSize/2; ++i) {
-            auto temp3 = temp2({0,0, i,i+1}, true);
-            auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
-            auto temp5 = temp3;
-            temp3.assign(temp4);
-            temp4.assign(temp5);
+            auto temp3 = temp2({0,0, i,i+1});
+            auto temp4 = temp2({0,0, curSize-1-i,curSize-i});
+            temp3.swapUnsafe(temp4);
         }
     }
 }
@@ -695,54 +659,45 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
     const int n = col2 - col1 + 1;
     const int k = n/2;
     const T almostZero = DataTypeUtils::min<T>();
-    T alphaK;
-    T betaK;
-    T r0;
-    T lambda, phi, c0, s0;
-    auto l = NDArrayFactory::create<T>(_u.ordering(), {1, k}, _u.getContext());
-    auto f = NDArrayFactory::create<T>(_u.ordering(), {1, n-k-1}, _u.getContext());
+    T alphaK, betaK, r0, lambda, phi, c0, s0;
+
+    NDArray l(_u.ordering(), {1, k}, _u.dataType(), _u.getContext());
+    NDArray f(_u.ordering(), {1, n-k-1}, _u.dataType(), _u.getContext());
 
     if(n < _switchSize) {
 
         JacobiSVD<T> jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV);
 
-        if (_calcU) {
-            auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true);
-            temp.assign(jac._u);
-        }
+        if (_calcU)
+            _u({col1,col1+n+1, col1,col1+n+1}, true).assign(jac._u);
         else {
-            auto temp1 = _u({0,1, col1,col1+n+1}, true);
-            temp1.assign(jac._u({0,1, 0,0}, true));
-            auto temp2 = _u({1,2, col1,col1+n+1}, true);
-            temp2.assign(jac._u({n,n+1, 0,0}, true));
+            _u({0,1, col1,col1+n+1}, true).assign(jac._u({0,1, 0,0}, true));
+            _u({1,2, col1,col1+n+1}, true).assign(jac._u({n,n+1, 0,0}, true));
         }
 
-        if (_calcV) {
-            auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true);
-            temp.assign(jac._v);
-        }
+        if (_calcV)
+            _v({row1W,row1W+n, col1W,col1W+n}, true).assign(jac._v);
 
-        auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true);
-        temp.assign(0.);
+        _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true).nullify();
         auto diag = _m.diagonal('c');
         diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
 
         return;
     }
 
-    alphaK = _m.e<T>(col1 + k, col1 + k);
-    betaK  = _m.e<T>(col1 + k + 1, col1 + k);
+    alphaK = _m.t<T>(col1 + k, col1 + k);
+    betaK  = _m.t<T>(col1 + k + 1, col1 + k);
 
     DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift);
     DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1);
 
     if (_calcU) {
-        lambda = _u.e<T>(col1 + k, col1 + k);
-        phi    = _u.e<T>(col1 + k + 1, col2 + 1);
+        lambda = _u.t<T>(col1 + k, col1 + k);
+        phi    = _u.t<T>(col1 + k + 1, col2 + 1);
     }
     else {
-        lambda = _u.e<T>(1, col1 + k);
-        phi    = _u.e<T>(0, col2 + 1);
+        lambda = _u.t<T>(1, col1 + k);
+        phi    = _u.t<T>(0, col2 + 1);
     }
 
     r0 = math::nd4j_sqrt<T, T>((math::nd4j_abs<T>(alphaK * lambda) * math::nd4j_abs<T>(alphaK * lambda)) + math::nd4j_abs<T>(betaK * phi) * math::nd4j_abs<T>(betaK * phi));
@@ -757,7 +712,7 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
     }
 
     if (_calcV)
-        _v.p(row1W+k, col1W, 1.f);
+        _v.r<T>(row1W+k, col1W) = (T)1;
 
     if (r0 < almostZero){
         c0 = 1.;
@@ -770,39 +725,37 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
 
     if (_calcU) {
 
-        auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true);
-        NDArray q1(temp);
+        NDArray q1 = _u({col1,col1+k+1, col1+k,col1+k+1}, true).dup();
 
-        for (int i = col1 + k - 1; i >= col1; --i) {
-           auto temp = _u({col1,col1+k+1, i+1,i+2}, true);
-            temp.assign(_u({col1, col1+k+1, i, i+1}, true));
-        }
+        for (int i = col1 + k - 1; i >= col1; --i)
+            _u({col1,col1+k+1, i+1,i+2}, true).assign(_u({col1,col1+k+1, i,i+1}, true));
+
+        NDArray temp1 = _u({col1+k+1,col1+n+1, col2+1,col2+2}, true);
 
         _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0);
         _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0));
-        _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast<const NDArray&>(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0);
-        _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0;
+        _u({col1+k+1,col1+n+1, col1,col1+1}, true).assign(temp1 * s0);
+        temp1 *= c0;
     }
     else  {
 
-        T q1 = _u.e<T>(0, col1 + k);
+        T q1 = _u.t<T>(0, col1 + k);
 
         for (int i = col1 + k - 1; i >= col1; --i)
-            _u.p(0, i+1, _u.e<T>(0, i));
+            _u.r<T>(0, i+1) = _u.r<T>(0, i);
 
-        _u.p(0, col1, q1 * c0);
-        _u.p(0, col2+1, -q1*s0);
-        _u.p(1, col1, _u.e<T>(1, col2+1) * s0);
-        _u.p(1, col2 + 1,  _u.e<T>(1, col2 + 1) * c0);
-        _u({1,2,  col1+1, col1+k+1}, true) = 0.f;
-        _u({0,1,  col1+k+1, col1+n}, true) = 0.f;
+        _u.r<T>(0, col1) = q1 * c0;
+        _u.r<T>(0, col2+1) = -q1*s0;
+        _u.r<T>(1, col1) = _u.t<T>(1, col2+1) * s0;
+        _u.r<T>(1, col2+1) =  _u.t<T>(1, col2+1) * c0;
+        _u({1,2,  col1+1, col1+k+1}).nullify();
+        _u({0,1,  col1+k+1, col1+n}).nullify();
     }
 
-   _m.p(col1 + shift, col1 + shift, r0);
-    auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true);
-    temp1.assign(l*alphaK);
-    auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true);
-    temp2.assign(f*betaK);
+    _m.r<T>(col1+shift, col1+shift) = r0;
+
+    _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true).assign(l*alphaK);
+    _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true).assign(f*betaK);
 
     deflation(col1, col2, k, row1W, col1W, shift);
 
@@ -810,26 +763,22 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
     calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD);
 
     if(_calcU) {
-        auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, UofSVD));
+        auto temp = _u({col1, col1+n+1, col1,col1+n+1}, true);
+        temp.assign(mmul(temp, UofSVD));
     }
     else {
-        auto pTemp = _u({0,0, col1,col1+n+1}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, UofSVD));
+        auto temp = _u({0,0, col1,col1+n+1}, true);
+        temp.assign(mmul(temp, UofSVD));
     }
 
     if (_calcV) {
-        auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, VofSVD));
+        auto temp = _v({row1W,row1W+n, row1W,row1W+n}, true);
+        temp.assign(mmul(temp, VofSVD));
     }
 
     auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true);
-    blockM = 0.f;
-    auto diag = blockM.diagonal('c');
-    diag.assign(singVals);
+    blockM.nullify();
+    blockM.diagonal('c').assign(singVals);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -839,24 +788,22 @@ void SVD<T>::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDAr
     if (_calcU) {
 
         int colsU = _fullUV ? hhU.rows() : _diagSize;
-        auto temp1 = NDArrayFactory::create<T>(_u.ordering(), {hhU.rows(), colsU}, _u.getContext());
+        NDArray temp1(_u.ordering(), {hhU.rows(), colsU}, _u.dataType(), _u.getContext());
         temp1.setIdentity();
         _u = temp1;
 
-        auto temp2 = _u({0,_diagSize, 0,_diagSize}, true);
-        temp2.assign(V({0,_diagSize, 0,_diagSize}, true));
+        _u({0,_diagSize, 0,_diagSize}, true).assign(V({0,_diagSize, 0,_diagSize}, true));
         const_cast<HHsequence&>(hhU).mulLeft(_u);
     }
 
     if (_calcV) {
 
         int colsV = _fullUV ? hhV.rows() : _diagSize;
-        auto temp1 = NDArrayFactory::create<T>(_v.ordering(), {hhV.rows(), colsV}, _v.getContext());
+        NDArray temp1(_v.ordering(), {hhV.rows(), colsV}, _v.dataType(), _v.getContext());
         temp1.setIdentity();
         _v = temp1;
 
-        auto temp2 = _v({0,_diagSize, 0,_diagSize}, true);
-        temp2.assign(U({0,_diagSize, 0,_diagSize}, true));
+        _v({0,_diagSize, 0,_diagSize}, true).assign(U({0,_diagSize, 0,_diagSize}, true));
         const_cast<HHsequence&>(hhV).mulLeft(_v);
     }
 }
@@ -881,48 +828,40 @@ void SVD<T>::evalData(const NDArray& matrix) {
         return;
     }
 
-    T scale = matrix.reduceNumber(reduce::AMax).e<T>(0);
+    T scale = matrix.reduceNumber(reduce::AMax).t<T>(0);
 
     if(scale == (T)0.)
         scale = 1.;
 
-    NDArray copy;
-    if(_transp)
-        copy = matrix.transpose();
-    else
-        copy = matrix / scale;
+    BiDiagonalUp biDiag(_transp ? matrix.transpose() : matrix / scale);
 
-    BiDiagonalUp biDiag(copy);
+    _u.nullify();
+    _v.nullify();
 
-    _u = 0.;
-    _v = 0.;
+    _m({0,_diagSize, 0,0}, true).assign(biDiag._HHbidiag.transpose());
 
-    auto temp1 = biDiag._HHbidiag.transpose();
-    auto temp2 = _m({0,_diagSize, 0,0}, true);
-    temp2.assign(temp1);
-
-
-    auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true);
-    temp3.assign(0.);
+    _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}).nullify();
 
     DivideAndConquer(0, _diagSize - 1, 0, 0, 0);
 
     for (int i = 0; i < _diagSize; ++i) {
-        T a = math::nd4j_abs<T>(_m.e<T>(i, i));
-        _s.p(i, a * scale);
+        T a = math::nd4j_abs<T>(_m.t<T>(i, i));
+        _s.r<T>(i) = a * scale;
         if (a < almostZero) {
-            auto temp = _s({i+1,_diagSize, 0,0}, true);
-            temp.assign(0.);
+            _s({i+1,_diagSize, 0,0}).nullify();
             break;
         }
         else if (i == _diagSize-1)
             break;
     }
 
+    HHsequence hhV = biDiag.makeHHsequence('v');
+    HHsequence hhU = biDiag.makeHHsequence('u');
+
     if(_transp)
-        exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u);
+        exchangeUV(hhV, hhU, _v, _u);
     else
-        exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v);
+        exchangeUV(hhU, hhV, _u, _v);
 }
 
 
diff --git a/libnd4j/include/helpers/hhSequence.h b/libnd4j/include/helpers/hhSequence.h
index 31855a86c..1e1f8ecad 100644
--- a/libnd4j/include/helpers/hhSequence.h
+++ b/libnd4j/include/helpers/hhSequence.h
@@ -27,35 +27,35 @@ namespace sd {
 namespace ops {
 namespace helpers {
 
-    
+
 class HHsequence {
 
     public:
-    
+
     /*
     *  matrix containing the Householder vectors
     */
-    NDArray _vectors;        
+    const NDArray& _vectors;
 
     /*
     *  vector containing the Householder coefficients
     */
-    NDArray _coeffs;    
-    
+    const NDArray& _coeffs;
+
     /*
-    *  shift of the Householder sequence 
+    *  shift of the Householder sequence
     */
     int _shift;
 
     /*
     *  length of the Householder sequence
     */
-    int _diagSize;        
+    int _diagSize;
 
-    /* 
+    /*
     *  type of sequence, type = 'u' (acting on columns, left) or type = 'v' (acting on rows, right)
     */
-    char _type;        
+    char _type;
 
     /*
     *  constructor
@@ -64,18 +64,18 @@ class HHsequence {
 
     /**
     *  this method mathematically multiplies input matrix on Householder sequence from the left H0*H1*...Hn * matrix
-    * 
+    *
     *  matrix - input matrix to be multiplied
     */
     template <typename T>
-    void _mulLeft(NDArray& matrix);
+    void mulLeft_(NDArray& matrix);
 
     void mulLeft(NDArray& matrix);
 
     NDArray getTail(const int idx) const;
 
     template <typename T>
-    void _applyTo(NDArray& dest);
+    void applyTo_(NDArray& dest);
 
     void applyTo(NDArray& dest);
 
@@ -87,8 +87,8 @@ class HHsequence {
 //////////////////////////////////////////////////////////////////////////
 FORCEINLINE int HHsequence::rows() const {
 
-    return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); 
-}    
+    return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1);
+}
 
 
 
diff --git a/libnd4j/include/helpers/householder.h b/libnd4j/include/helpers/householder.h
index e71769901..7811fafa0 100644
--- a/libnd4j/include/helpers/householder.h
+++ b/libnd4j/include/helpers/householder.h
@@ -32,74 +32,74 @@ template <typename T>
 class Householder {
 
     public:
-        
+
     /**
     *  this method calculates Householder matrix P = identity_matrix - coeff * w * w^T
     *  P * x = [normX, 0, 0 , 0, ...]
-    *  coeff - scalar    
+    *  coeff - scalar
     *  w = [1, w1, w2, w3, ...]
     *  w = u / u0
     *  u = x - |x|*e0
-    *  u0 = x0 - |x| 
+    *  u0 = x0 - |x|
     *  e0 = [1, 0, 0 , 0, ...]
-    * 
+    *
     *  x - input vector, remains unaffected
-    */                       
-    static NDArray evalHHmatrix(const NDArray& x);
+    */
+    // static NDArray evalHHmatrix(const NDArray& x);
 
     /**
     *  this method evaluates data required for calculation of Householder matrix P = identity_matrix - coeff * w * w^T
     *  P * x = [normX, 0, 0 , 0, ...]
-    *  coeff - scalar    
+    *  coeff - scalar
     *  w = [1, w1, w2, w3, ...]
     *  w = u / u0
     *  u = x - |x|*e0
-    *  u0 = x0 - |x| 
+    *  u0 = x0 - |x|
     *  e0 = [1, 0, 0 , 0, ...]
-    * 
+    *
     *  x - input vector, remains unaffected
     *  tail - the essential part of the vector w: [w1, w2, w3, ...]
     *  normX - this scalar is the first non-zero element in vector resulting from Householder transformation -> (P*x)
-    *  coeff - scalar, scaling factor in Householder matrix formula  
+    *  coeff - scalar, scaling factor in Householder matrix formula
     */
     static void evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX);
 
-    static void evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX);
+    static void evalHHmatrixDataI(NDArray& x, T& coeff, T& normX);  // in-place, x to be affected
 
     /**
     *  this method mathematically multiplies input matrix on Householder from the left P * matrix
-    * 
+    *
     *  matrix - input matrix
     *  tail - the essential part of the Householder vector w: [w1, w2, w3, ...]
-    *  coeff - scalar, scaling factor in Householder matrix formula  
+    *  coeff - scalar, scaling factor in Householder matrix formula
     */
     static void mulLeft(NDArray& matrix, const NDArray& tail, const T coeff);
 
     /**
     *  this method mathematically multiplies input matrix on Householder from the right matrix * P
-    * 
+    *
     *  matrix - input matrix
     *  tail - the essential part of the Householder vector w: [w1, w2, w3, ...]
-    *  coeff - scalar, scaling factor in Householder matrix formula  
-    */                       
+    *  coeff - scalar, scaling factor in Householder matrix formula
+    */
     static void mulRight(NDArray& matrix, const NDArray& tail, const T coeff);
-        
+
 
 
 };
 
-    
+
     // /**
     // *  this function reduce given matrix to  upper bidiagonal form (in-place operation), matrix must satisfy following condition rows >= cols
-    // * 
-    // *  matrix - input 2D matrix to be reduced to upper bidiagonal from    
+    // *
+    // *  matrix - input 2D matrix to be reduced to upper bidiagonal from
     // */
     // template <typename T>
     // void biDiagonalizeUp(NDArray& matrix);
 
-    // /** 
+    // /**
     // *  given a matrix [m,n], this function computes its singular value decomposition matrix = u * s * v^T
-    // *   
+    // *
     // *  matrix - input 2D matrix to decompose, [m, n]
     // *  u - unitary matrix containing left singular vectors of input matrix, [m, m]
     // *  s - diagonal matrix with singular values of input matrix (non-negative) on the diagonal sorted in decreasing order,
@@ -109,7 +109,7 @@ class Householder {
     // *  fullUV - if false then only p (p is smaller among m and n) first columns of u and v will be calculated and their dimensions in this case are [m, p] and [n, p]
     // *
     // */
-    // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false)    
+    // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false)
 
 
 
diff --git a/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp
new file mode 100644
index 000000000..6eeb0c28b
--- /dev/null
+++ b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp
@@ -0,0 +1,293 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#include <helpers/HessenbergAndSchur.h>
+#include <helpers/EigenValsAndVecs.h>
+
+
+namespace sd      {
+namespace ops     {
+namespace helpers {
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+EigenValsAndVecs<T>::EigenValsAndVecs(const NDArray& matrix) {
+
+    if(matrix.rankOf() != 2)
+        throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input matrix must be 2D !");
+
+    if(matrix.sizeAt(0) != matrix.sizeAt(1))
+        throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input array must be 2D square matrix !");
+
+    Schur<T> schur(matrix);
+
+    NDArray& schurMatrixU = schur._U;
+    NDArray& schurMatrixT = schur._T;
+
+    _Vecs = NDArray(matrix.ordering(), {schurMatrixU.sizeAt(1), schurMatrixU.sizeAt(1), 2}, matrix.dataType(), matrix.getContext());
+    _Vals = NDArray(matrix.ordering(), {matrix.sizeAt(1), 2}, matrix.dataType(), matrix.getContext());
+
+    // sequence of methods calls matters
+    calcEigenVals(schurMatrixT);
+    calcPseudoEigenVecs(schurMatrixT, schurMatrixU);    // pseudo-eigenvectors are real and will be stored in schurMatrixU
+    calcEigenVecs(schurMatrixU);
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void EigenValsAndVecs<T>::calcEigenVals(const NDArray& schurMatrixT) {
+
+    const int numOfCols = schurMatrixT.sizeAt(1);
+
+    // calculate eigenvalues _Vals
+    int i = 0;
+    while (i < numOfCols) {
+
+        if (i == numOfCols - 1 || schurMatrixT.t<T>(i+1, i) == T(0.f)) {
+
+            _Vals.r<T>(i, 0) = schurMatrixT.t<T>(i, i); // real part
+            _Vals.r<T>(i, 1) = T(0);                    // imaginary part
+
+            if(!math::nd4j_isfin<T>(_Vals.t<T>(i, 0))) {
+                throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !");
+                return;
+            }
+
+            ++i;
+        }
+        else {
+
+            T p = T(0.5) * (schurMatrixT.t<T>(i, i) - schurMatrixT.t<T>(i+1, i+1));
+            T z;
+            {
+                T t0 = schurMatrixT.t<T>(i+1, i);
+                T t1 = schurMatrixT.t<T>(i, i+1);
+                T maxval = math::nd4j_max<T>(math::nd4j_abs<T>(p), math::nd4j_max<T>(math::nd4j_abs<T>(t0), math::nd4j_abs<T>(t1)));
+                t0 /= maxval;
+                t1 /= maxval;
+                T p0 = p / maxval;
+                z = maxval * math::nd4j_sqrt<T,T>(math::nd4j_abs<T>(p0 * p0 + t0 * t1));
+            }
+
+            _Vals.r<T>(i, 0)  = _Vals.r<T>(i+1, 0) = schurMatrixT.t<T>(i+1, i+1) + p;
+            _Vals.r<T>(i, 1)  = z;
+            _Vals.r<T>(i+1,1) = -z;
+
+            if(!(math::nd4j_isfin<T>(_Vals.t<T>(i,0)) && math::nd4j_isfin<T>(_Vals.t<T>(i+1,0)) && math::nd4j_isfin<T>(_Vals.t<T>(i,1))) && math::nd4j_isfin<T>(_Vals.t<T>(i+1,1))) {
+                throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !");
+                return;
+            }
+
+            i += 2;
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void EigenValsAndVecs<T>::calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU) {
+
+    const int numOfCols = schurMatrixU.sizeAt(1);
+
+    T norm = 0;
+    for (int j = 0; j < numOfCols; ++j)
+        norm += schurMatrixT({j,j+1, math::nd4j_max<Nd4jLong>(j-1, 0),numOfCols}).reduceNumber(reduce::ASum).template t<T>(0);
+
+    if (norm == T(0))
+        return;
+
+    for (int n = numOfCols-1; n >= 0; n--) {
+
+        T p = _Vals.t<T>(n, 0);     // real part
+        T q = _Vals.t<T>(n, 1);     // imaginary part
+
+        if(q == (T)0) {    // not complex
+
+            T lastr((T)0), lastw((T)0);
+            int l = n;
+
+            schurMatrixT.r<T>(n, n) = T(1);
+
+            for (int i = n-1; i >= 0; i--) {
+
+                T w = schurMatrixT.t<T>(i,i) - p;
+                T r = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t<T>(0); // dot
+
+                if (_Vals.t<T>(i, 1) < T(0)) {
+                    lastw = w;
+                    lastr = r;
+                }
+                else {
+
+                    l = i;
+                    if (_Vals.t<T>(i, 1) == T(0)) {
+
+                        if (w != T(0))
+                            schurMatrixT.r<T>(i, n) = -r / w;
+                        else
+                            schurMatrixT.r<T>(i, n) = -r / (DataTypeUtils::eps<T>() * norm);
+                    }
+                    else {
+
+                        T x = schurMatrixT.t<T>(i, i+1);
+                        T y = schurMatrixT.t<T>(i+1, i);
+                        T denom = (_Vals.t<T>(i, 0) - p) * (_Vals.t<T>(i, 0) - p) + _Vals.t<T>(i, 1) * _Vals.t<T>(i, 1);
+                        T t = (x * lastr - lastw * r) / denom;
+                        schurMatrixT.r<T>(i, n) = t;
+
+                        if (math::nd4j_abs<T>(x) > math::nd4j_abs<T>(lastw))
+                          schurMatrixT.r<T>(i+1, n) = (-r - w * t) / x;
+                        else
+                          schurMatrixT.r<T>(i+1, n) = (-lastr - y * t) / lastw;
+                      }
+
+
+                    T t = math::nd4j_abs<T>(schurMatrixT.t<T>(i, n));
+                    if((DataTypeUtils::eps<T>() * t) * t > T(1))
+                        schurMatrixT({schurMatrixT.sizeAt(0)-numOfCols+i,-1, n,n+1}) /= t;
+                }
+            }
+        }
+        else if(q < T(0) && n > 0) {           // complex
+
+            T lastra(0), lastsa(0), lastw(0);
+            int l = n - 1;
+
+            if(math::nd4j_abs<T>(schurMatrixT.t<T>(n, n-1)) > math::nd4j_abs<T>(schurMatrixT.t<T>(n-1, n))) {
+
+                schurMatrixT.r<T>(n-1, n-1) = q / schurMatrixT.t<T>(n, n-1);
+                schurMatrixT.r<T>(n-1, n)   = -(schurMatrixT.t<T>(n, n) - p) / schurMatrixT.t<T>(n, n-1);
+            }
+            else {
+                divideComplexNums(T(0),-schurMatrixT.t<T>(n-1,n),  schurMatrixT.t<T>(n-1,n-1)-p,q,  schurMatrixT.r<T>(n-1,n-1),schurMatrixT.r<T>(n-1,n));
+            }
+
+            schurMatrixT.r<T>(n,n-1) = T(0);
+            schurMatrixT.r<T>(n,n)   = T(1);
+
+            for (int i = n-2; i >= 0; i--) {
+
+                T ra = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n-1,n}, true)).template t<T>(0);            // dot
+                T sa = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t<T>(0);            // dot
+
+                T w = schurMatrixT.t<T>(i,i) - p;
+
+                if (_Vals.t<T>(i, 1) < T(0)) {
+                    lastw = w;
+                    lastra = ra;
+                    lastsa = sa;
+                }
+                else {
+
+                    l = i;
+
+                    if (_Vals.t<T>(i, 1) == T(0)) {
+                        divideComplexNums(-ra,-sa, w,q, schurMatrixT.r<T>(i,n-1),schurMatrixT.r<T>(i,n));
+                    }
+                    else {
+
+                        T x = schurMatrixT.t<T>(i,i+1);
+                        T y = schurMatrixT.t<T>(i+1,i);
+                        T vr = (_Vals.t<T>(i, 0) - p) * (_Vals.t<T>(i, 0) - p) + _Vals.t<T>(i, 1) * _Vals.t<T>(i, 1) - q * q;
+                        T vi = (_Vals.t<T>(i, 0) - p) * T(2) * q;
+
+                        if ((vr == T(0)) && (vi == T(0)))
+                            vr = DataTypeUtils::eps<T>() * norm * (math::nd4j_abs<T>(w) + math::nd4j_abs<T>(q) + math::nd4j_abs<T>(x) + math::nd4j_abs<T>(y) + math::nd4j_abs<T>(lastw));
+
+                        divideComplexNums(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra, vr,vi, schurMatrixT.r<T>(i,n-1),schurMatrixT.r<T>(i,n));
+
+                        if(math::nd4j_abs<T>(x) > (math::nd4j_abs<T>(lastw) + math::nd4j_abs<T>(q))) {
+
+                            schurMatrixT.r<T>(i+1,n-1) = (-ra - w * schurMatrixT.t<T>(i,n-1) + q * schurMatrixT.t<T>(i,n))   / x;
+                            schurMatrixT.r<T>(i+1,n)   = (-sa - w * schurMatrixT.t<T>(i,n)   - q * schurMatrixT.t<T>(i,n-1)) / x;
+                        }
+                        else
+                            divideComplexNums(-lastra-y*schurMatrixT.t<T>(i,n-1),-lastsa-y*schurMatrixT.t<T>(i,n), lastw,q, schurMatrixT.r<T>(i+1,n-1),schurMatrixT.r<T>(i+1,n));
+                    }
+
+                    T t = math::nd4j_max<T>(math::nd4j_abs<T>(schurMatrixT.t<T>(i, n-1)), math::nd4j_abs<T>(schurMatrixT.t<T>(i,n)));
+                    if ((DataTypeUtils::eps<T>() * t) * t > T(1))
+                        schurMatrixT({i,numOfCols, n-1,n+1}) /= t;
+                }
+            }
+            n--;
+        }
+        else
+            throw std::runtime_error("ops::helpers::EigenValsAndVecs::calcEigenVecs: internal bug !");
+    }
+
+    for (int j = numOfCols-1; j >= 0; j--)
+        schurMatrixU({0,0, j,j+1}, true).assign( mmul(schurMatrixU({0,0, 0,j+1}, true), schurMatrixT({0,j+1, j,j+1}, true)) );
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void EigenValsAndVecs<T>::calcEigenVecs(const NDArray& schurMatrixU) {
+
+    const T precision = T(2) * DataTypeUtils::eps<T>();
+
+    const int numOfCols = schurMatrixU.sizeAt(1);
+
+    for (int j = 0; j < numOfCols; ++j) {
+
+        if(math::nd4j_abs<T>(_Vals.t<T>(j, 1)) <= math::nd4j_abs<T>(_Vals.t<T>(j, 0)) * precision || j+1 == numOfCols) {    // real
+
+            _Vecs.syncToDevice();
+            _Vecs({0,0, j,j+1, 0,1}).assign(schurMatrixU({0,0, j,j+1}));
+            _Vecs({0,0, j,j+1, 1,2}) = (T)0;
+
+            // normalize
+            const T norm2 = _Vecs({0,0, j,j+1, 0,1}).reduceNumber(reduce::SquaredNorm).template t<T>(0);
+            if(norm2 > (T)0)
+                _Vecs({0,0, j,j+1, 0,1}) /= math::nd4j_sqrt<T,T>(norm2);
+        }
+        else { // complex
+
+            for (int i = 0; i < numOfCols; ++i) {
+                _Vecs.r<T>(i, j, 0)   = _Vecs.r<T>(i, j+1, 0) = schurMatrixU.t<T>(i, j);
+                _Vecs.r<T>(i, j, 1)   = schurMatrixU.t<T>(i, j+1);
+                _Vecs.r<T>(i, j+1, 1) = -schurMatrixU.t<T>(i, j+1);
+            }
+
+            // normalize
+            T norm2 = _Vecs({0,0, j,j+1, 0,0}).reduceNumber(reduce::SquaredNorm).template t<T>(0);
+            if(norm2 > (T)0)
+                _Vecs({0,0, j,j+1, 0,0}) /= math::nd4j_sqrt<T,T>(norm2);
+
+            // normalize
+            norm2 = _Vecs({0,0, j+1,j+2, 0,0}).reduceNumber(reduce::SquaredNorm).template t<T>(0);
+            if(norm2 > (T)0)
+                _Vecs({0,0, j+1,j+2, 0,0}) /= math::nd4j_sqrt<T,T>(norm2);
+
+            ++j;
+        }
+    }
+}
+
+
+template class ND4J_EXPORT EigenValsAndVecs<float>;
+template class ND4J_EXPORT EigenValsAndVecs<float16>;
+template class ND4J_EXPORT EigenValsAndVecs<bfloat16>;
+template class ND4J_EXPORT EigenValsAndVecs<double>;
+
+}
+}
+}
\ No newline at end of file
diff --git a/libnd4j/include/helpers/impl/FullPivLU.cpp b/libnd4j/include/helpers/impl/FullPivLU.cpp
new file mode 100644
index 000000000..efb7571ed
--- /dev/null
+++ b/libnd4j/include/helpers/impl/FullPivLU.cpp
@@ -0,0 +1,170 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#include <helpers/FullPivLU.h>
+#include <ops/declarable/helpers/triangular_solve.h>
+#include <numeric>
+
+
+namespace sd      {
+namespace ops     {
+namespace helpers {
+
+
+//////////////////////////////////////////////////////////////////////////
+// A{M,K} * x{K,N} = b{M,N}
+template <typename T>
+void FullPivLU<T>::solve(const NDArray& A, const NDArray& b, NDArray& x) {
+
+    if(A.rankOf() != 2)
+        throw std::runtime_error("FullPivLU::solve: input matrix A must be 2D !");
+
+    if(A.sizeAt(0) !=  b.sizeAt(0))
+        throw std::runtime_error("FullPivLU::solve: A and b must have the same number of rows !");
+
+    if(A.sizeAt(1) !=  x.sizeAt(0))
+        throw std::runtime_error("FullPivLU::solve: number of A columns must be equal to number of x rows !");
+
+    NDArray LU = A.dup();
+
+    const int rows    = LU.sizeAt(0);
+    const int cols    = LU.sizeAt(1);
+    const int diagLen = math::nd4j_min<int>(rows, cols);
+
+    std::vector<int> rowsInds(rows), colsInds(cols);
+
+    int numOfTranspos = 0;
+    int nonZeroPivots1 = diagLen;
+
+    T maxPivot = T(0);
+
+    for(int k = 0; k < diagLen; ++k) {
+
+        NDArray bottomRightCorner = LU({k,rows, k,cols}, true);
+        const int indPivot = static_cast<int>(bottomRightCorner.indexReduceNumber(indexreduce::IndexAbsoluteMax).t<Nd4jLong>(0));
+
+        int colPivot = indPivot % (cols-k);
+        int rowPivot = indPivot / (cols-k);
+
+        T currentMax = math::nd4j_abs<T>(bottomRightCorner.t<T>(rowPivot, colPivot));
+
+        // take into account that this was calculated in corner, not in whole LU
+        rowPivot += k;
+        colPivot += k;
+
+        if(currentMax == T(0)) {
+
+            nonZeroPivots1 = k;
+
+            for(int i = k; i < diagLen; ++i)
+                rowsInds[i] = colsInds[i] = i;
+
+            break;
+        }
+
+        if(currentMax > maxPivot)
+            maxPivot = currentMax;
+
+        rowsInds[k] = rowPivot;
+        colsInds[k] = colPivot;
+
+        if(k != rowPivot) {
+            NDArray row1 = LU({k,k+1, 0,0}, true);
+            NDArray row2 = LU({rowPivot,rowPivot+1, 0,0}, true);
+            row1.swapUnsafe(row2);
+            ++numOfTranspos;
+        }
+        if(k != colPivot) {
+            NDArray col1 = LU({0,0, k,k+1}, true);
+            NDArray col2 = LU({0,0, colPivot,colPivot+1}, true);
+            col1.swapUnsafe(col2);
+            ++numOfTranspos;
+        }
+
+        if(k < rows-1)
+            LU({k+1,rows, k,k+1}, true) /=  LU.t<T>(k, k);
+
+        if(k < diagLen-1)
+            LU({k+1,rows, k+1,cols},true) -= mmul(LU({k+1,rows, k,k+1},true), LU({k,k+1, k+1,cols},true));
+    }
+
+    //***************************************************//
+
+    const T threshold = maxPivot * DataTypeUtils::eps<T>() * (T)diagLen;
+
+    int  nonZeroPivots2 = 0;
+    for(int i = 0; i < nonZeroPivots1; ++i)
+        nonZeroPivots2 += static_cast<int>(math::nd4j_abs<T>(LU.t<T>(i,i)) > threshold);
+
+    if(nonZeroPivots2 == 0) {
+        x.nullify();
+        return;
+    }
+
+    //***************************************************//
+
+    std::vector<int> rowsPermut1(rows), rowsPermut2(rows), colsPermut(cols);
+    std::iota(rowsPermut1.begin(), rowsPermut1.end(), 0);
+    std::iota(colsPermut.begin(), colsPermut.end(), 0);
+
+    for(int k = diagLen-1; k >= 0; --k)
+        math::nd4j_swap<int>(rowsPermut1[k], rowsPermut1[rowsInds[k]]);
+
+    for(int k = 0; k < diagLen; ++k)
+        math::nd4j_swap<int>(colsPermut[k], colsPermut[colsInds[k]]);
+
+    for(int i = 0; i < rows; ++i)
+        for(int j = 0; j < rows; ++j)
+            if(i == rowsPermut1[j]) { rowsPermut2[i] = j; break; }
+
+    //***************************************************//
+
+    NDArray c = b.ulike();
+
+    for (int i = 0; i < rows; ++i)
+        c({i,i+1, 0,0}, true).assign(b({rowsPermut2[i],rowsPermut2[i]+1, 0,0}, true));
+
+
+    NDArray cTopRows1 = c({0,diagLen, 0,0}, true);
+    // TriangularSolver<T>::solve(LU({0,diagLen, 0,diagLen}, true), cTopRows1, true, true, cTopRows1);
+    ops::helpers::triangularSolve2D<T>(nullptr, LU({0,diagLen, 0,diagLen}, true), cTopRows1,true,true, cTopRows1);
+
+    if(rows > cols)
+        c({cols,-1, 0,0}, true) -= mmul(LU({cols,-1, 0,0},true),  c({0,cols, 0,0}, true));
+
+    NDArray cTopRows2 = c({0,nonZeroPivots2, 0,0}, true);
+    // TriangularSolver<T>::solve(LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true), cTopRows2, false, false, cTopRows2);
+    ops::helpers::triangularSolve2D<T>(nullptr, LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true),cTopRows2,false,false, cTopRows2);
+
+    for(int i = 0; i < nonZeroPivots2; ++i)
+        x({colsPermut[i],colsPermut[i]+1, 0,0}, true).assign(c({i,i+1, 0,0}, true));
+
+    for(int i = nonZeroPivots2; i < cols; ++i)
+        x({colsPermut[i],colsPermut[i]+1, 0,0}, true).nullify();
+}
+
+template class ND4J_EXPORT FullPivLU<float>;
+template class ND4J_EXPORT FullPivLU<float16>;
+template class ND4J_EXPORT FullPivLU<bfloat16>;
+template class ND4J_EXPORT FullPivLU<double>;
+
+}
+}
+}
diff --git a/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp
new file mode 100644
index 000000000..31495cab9
--- /dev/null
+++ b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp
@@ -0,0 +1,383 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#include <helpers/HessenbergAndSchur.h>
+#include <helpers/householder.h>
+#include <helpers/hhSequence.h>
+#include <helpers/jacobiSVD.h>
+
+
+namespace sd      {
+namespace ops     {
+namespace helpers {
+
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+Hessenberg<T>::Hessenberg(const NDArray& matrix) {
+
+    if(matrix.rankOf() != 2)
+        throw std::runtime_error("ops::helpers::Hessenberg constructor: input matrix must be 2D !");
+
+    if(matrix.sizeAt(0) == 1) {
+        _Q = NDArray(matrix.ordering(), {1,1}, matrix.dataType(), matrix.getContext());
+        _Q = 1;
+        _H = matrix.dup();
+        return;
+    }
+
+    if(matrix.sizeAt(0) != matrix.sizeAt(1))
+        throw std::runtime_error("ops::helpers::Hessenberg constructor: input array must be 2D square matrix !");
+
+    _H = matrix.dup();
+    _Q = matrix.ulike();
+
+    evalData();
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Hessenberg<T>::evalData() {
+
+    const int rows = _H.sizeAt(0);
+
+    NDArray hhCoeffs(_H.ordering(), {rows - 1}, _H.dataType(), _H.getContext());
+
+    // calculate _H
+    for(uint i = 0; i < rows - 1; ++i) {
+
+        T coeff, norm;
+
+        NDArray tail1 = _H({i+1,-1, i,i+1});
+        NDArray tail2 = _H({i+2,-1, i,i+1}, true);
+
+        Householder<T>::evalHHmatrixDataI(tail1, coeff, norm);
+
+        _H({0,0, i,i+1}). template r<T>(i+1) = norm;
+        hhCoeffs. template r<T>(i) = coeff;
+
+        NDArray bottomRightCorner = _H({i+1,-1, i+1,-1}, true);
+        Householder<T>::mulLeft(bottomRightCorner, tail2, coeff);
+
+        NDArray rightCols = _H({0,0, i+1,-1}, true);
+        Householder<T>::mulRight(rightCols, tail2.transpose(), coeff);
+    }
+
+    // calculate _Q
+    HHsequence hhSeq(_H, hhCoeffs, 'u');
+    hhSeq._diagSize = rows - 1;
+    hhSeq._shift = 1;
+    hhSeq.applyTo_<T>(_Q);
+
+    // fill down with zeros starting at first subdiagonal
+    _H.fillAsTriangular<T>(0, -1, 0, _H, 'l');
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+Schur<T>::Schur(const NDArray& matrix) {
+
+    if(matrix.rankOf() != 2)
+        throw std::runtime_error("ops::helpers::Schur constructor: input matrix must be 2D !");
+
+    if(matrix.sizeAt(0) != matrix.sizeAt(1))
+        throw std::runtime_error("ops::helpers::Schur constructor: input array must be 2D square matrix !");
+
+    evalData(matrix);
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Schur<T>::evalData(const NDArray& matrix) {
+
+    const T scale = matrix.reduceNumber(reduce::AMax).template t<T>(0);
+
+    const T almostZero = DataTypeUtils::min<T>();
+
+    if(scale < DataTypeUtils::min<T>()) {
+
+        _T = matrix.ulike();
+        _U = matrix.ulike();
+
+        _T.nullify();
+        _U.setIdentity();
+
+        return;
+    }
+
+    // perform Hessenberg decomposition
+    Hessenberg<T> hess(matrix / scale);
+
+    _T = std::move(hess._H);
+    _U = std::move(hess._Q);
+
+    calcFromHessenberg();
+
+    _T *= scale;
+}
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void Schur<T>::splitTwoRows(const int ind, const T shift) {
+
+    const int numCols = _T.sizeAt(1);
+
+    T p = (T)0.5 * (_T.t<T>(ind-1, ind-1) - _T.t<T>(ind, ind));
+
+    T q = p*p + _T.t<T>(ind, ind-1) * _T.t<T>(ind-1, ind);
+
+    _T.r<T>(ind, ind) += shift;
+    _T.r<T>(ind-1, ind-1) += shift;
+
+    if (q >= (T)0) {
+
+        T z = math::nd4j_sqrt<T,T>(math::nd4j_abs<T>(q));
+
+        NDArray rotation(_T.ordering(), {2, 2}, _T.dataType(), _T.getContext());
+
+        if (p >= (T)0)
+            JacobiSVD<T>::createJacobiRotationGivens(p+z, _T.t<T>(ind, ind-1), rotation);
+        else
+            JacobiSVD<T>::createJacobiRotationGivens(p-z, _T.t<T>(ind, ind-1), rotation);
+
+        NDArray rightCols = _T({0,0, ind-1,-1});
+        JacobiSVD<T>::mulRotationOnLeft(ind-1, ind, rightCols, rotation.transpose());
+
+        NDArray topRows = _T({0,ind+1, 0,0});
+        JacobiSVD<T>::mulRotationOnRight(ind-1, ind, topRows, rotation);
+
+        JacobiSVD<T>::mulRotationOnRight(ind-1, ind, _U, rotation);
+
+        _T.r<T>(ind, ind-1) = (T)0;
+    }
+
+    if (ind > 1)
+        _T.r<T>(ind-1, ind-2) = (T)0;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void Schur<T>::calcShift(const int ind, const int iter, T& shift, NDArray& shiftVec) {
+
+    // shiftVec has length = 3
+
+    shiftVec.r<T>(0) = _T.t<T>(ind, ind);
+    shiftVec.r<T>(1) = _T.t<T>(ind-1, ind-1);
+    shiftVec.r<T>(2) = _T.t<T>(ind, ind-1) * _T.t<T>(ind-1, ind);
+
+    if (iter == 10) {
+        shift += shiftVec.t<T>(0);
+
+        for (int i = 0; i <= ind; ++i)
+            _T.r<T>(i,i) -= shiftVec.t<T>(0);
+
+        T s = math::nd4j_abs<T>(_T.t<T>(ind, ind-1)) + math::nd4j_abs<T>(_T.t<T>(ind-1, ind-2));
+
+        shiftVec.r<T>(0) = T(0.75) * s;
+        shiftVec.r<T>(1) = T(0.75) * s;
+        shiftVec.r<T>(2) = T(-0.4375) * s*s;
+    }
+
+    if (iter == 30) {
+
+        T s = (shiftVec.t<T>(1) - shiftVec.t<T>(0)) / T(2.0);
+        s = s*s + shiftVec.t<T>(2);
+
+        if (s > T(0)) {
+
+            s = math::nd4j_sqrt<T,T>(s);
+
+            if (shiftVec.t<T>(1) < shiftVec.t<T>(0))
+                s = -s;
+
+            s = s + (shiftVec.t<T>(1) - shiftVec.t<T>(0)) / T(2.0);
+            s = shiftVec.t<T>(0) - shiftVec.t<T>(2) / s;
+            shift += s;
+
+            for (int i = 0; i <= ind; ++i)
+                _T.r<T>(i,i) -= s;
+
+            shiftVec = T(0.964);
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void Schur<T>::initFrancisQR(const int ind1,  const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec) {
+
+  // shiftVec has length = 3
+
+  for (ind3 = ind2-2; ind3 >= ind1; --ind3) {
+
+        const T mm = _T.t<T>(ind3, ind3);
+        const T r = shiftVec.t<T>(0) - mm;
+        const T s = shiftVec.t<T>(1) - mm;
+
+        householderVec.r<T>(0) = (r * s - shiftVec.t<T>(2)) / _T.t<T>(ind3+1, ind3) + _T.t<T>(ind3, ind3+1);
+        householderVec.r<T>(1) = _T.t<T>(ind3+1, ind3+1) - mm - r - s;
+        householderVec.r<T>(2) = _T.t<T>(ind3+2, ind3+1);
+
+        if (ind3 == ind1)
+          break;
+
+        const T lhs = _T.t<T>(ind3,ind3-1) * (math::nd4j_abs<T>(householderVec.t<T>(1)) + math::nd4j_abs<T>(householderVec.t<T>(2)));
+        const T rhs = householderVec.t<T>(0) * (math::nd4j_abs<T>(_T.t<T>(ind3-1, ind3-1)) + math::nd4j_abs<T>(mm) + math::nd4j_abs<T>(_T.t<T>(ind3+1, ind3+1)));
+
+        if(math::nd4j_abs<T>(lhs) < DataTypeUtils::eps<T>() * rhs)
+            break;
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void Schur<T>::doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec) {
+
+    if(!(ind2 >= ind1))
+        throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition ind2 >= ind1 must be true !");
+    if(!(ind2 <= ind3-2))
+        throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition iind2 <= ind3-2 must be true !");
+
+    const int numCols = _T.sizeAt(1);
+
+    for (int k = ind2; k <= ind3-2; ++k) {
+
+        const bool firstIter = (k == ind2);
+
+        T coeff, normX;
+        NDArray tail(_T.ordering(), {2, 1}, _T.dataType(), _T.getContext());
+        Householder<T>::evalHHmatrixData(firstIter ? householderVec : _T({k,k+3, k-1,k}), tail, coeff, normX);
+
+        if (normX != T(0)) {
+
+            if (firstIter && k > ind1)
+                _T.r<T>(k, k-1) = -_T.t<T>(k, k-1);
+            else if (!firstIter)
+                _T.r<T>(k, k-1) = normX;
+
+            NDArray block1 = _T({k,k+3, k,numCols}, true);
+            Householder<T>::mulLeft(block1, tail, coeff);
+
+            NDArray block2 = _T({0,math::nd4j_min<int>(ind3,k+3)+1, k,k+3}, true);
+            Householder<T>::mulRight(block2, tail, coeff);
+
+            NDArray block3 = _U({0,numCols, k,k+3}, true);
+            Householder<T>::mulRight(block3, tail, coeff);
+        }
+    }
+
+    T coeff, normX;
+    NDArray tail(_T.ordering(), {1, 1}, _T.dataType(), _T.getContext());
+    Householder<T>::evalHHmatrixData(_T({ind3-1,ind3+1, ind3-2,ind3-1}), tail, coeff, normX);
+
+    if (normX != T(0)) {
+
+        _T.r<T>(ind3-1, ind3-2) = normX;
+
+        NDArray block1 = _T({ind3-1,ind3+1, ind3-1,numCols}, true);
+        Householder<T>::mulLeft(block1, tail, coeff);
+
+        NDArray block2 = _T({0,ind3+1, ind3-1,ind3+1}, true);
+        Householder<T>::mulRight(block2, tail, coeff);
+
+        NDArray block3 = _U({0,numCols, ind3-1,ind3+1}, true);
+        Householder<T>::mulRight(block3, tail, coeff);
+    }
+
+    for (int i = ind2+2; i <= ind3; ++i) {
+        _T.r<T>(i, i-2) = T(0);
+        if (i > ind2+2)
+            _T.r<T>(i, i-3) = T(0);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void Schur<T>::calcFromHessenberg() {
+
+    const int maxIters = _maxItersPerRow * _T.sizeAt(0);
+
+    const int numCols = _T.sizeAt(1);
+    int iu = numCols - 1;
+    int iter = 0;
+    int totalIter = 0;
+
+    T shift = T(0);
+
+    T norm = 0;
+    for (int j = 0; j < numCols; ++j)
+        norm += _T({0,math::nd4j_min<int>(numCols,j+2), j,j+1}).reduceNumber(reduce::ASum).template t<T>(0);
+
+    if(norm != T(0)) {
+
+        while (iu >= 0) {
+
+            const int il = getSmallSubdiagEntry(iu);
+
+            if (il == iu) {
+
+                _T.r<T>(iu,iu) = _T.t<T>(iu,iu) + shift;
+                if (iu > 0)
+                    _T.r<T>(iu, iu-1) = T(0);
+                iu--;
+                iter = 0;
+
+            }
+            else if (il == iu-1) {
+
+                splitTwoRows(iu, shift);
+                iu -= 2;
+                iter = 0;
+            }
+            else  {
+
+                NDArray householderVec(_T.ordering(), {3}, _T.dataType(), _T.getContext());
+                NDArray shiftVec      (_T.ordering(), {3}, _T.dataType(), _T.getContext());
+
+                calcShift(iu, iter, shift, shiftVec);
+
+                ++iter;
+                ++totalIter;
+
+                if (totalIter > maxIters)
+                    break;
+
+                int im;
+                initFrancisQR(il, iu, shiftVec, im, householderVec);
+                doFrancisQR(il, im, iu, householderVec);
+            }
+        }
+    }
+}
+
+template class ND4J_EXPORT Hessenberg<float>;
+template class ND4J_EXPORT Hessenberg<float16>;
+template class ND4J_EXPORT Hessenberg<bfloat16>;
+template class ND4J_EXPORT Hessenberg<double>;
+
+template class ND4J_EXPORT Schur<float>;
+template class ND4J_EXPORT Schur<float16>;
+template class ND4J_EXPORT Schur<bfloat16>;
+template class ND4J_EXPORT Schur<double>;
+
+}
+}
+}
\ No newline at end of file
diff --git a/libnd4j/include/helpers/impl/MmulHelper.cpp b/libnd4j/include/helpers/impl/MmulHelper.cpp
index 8e37fd530..ba86bb1b5 100644
--- a/libnd4j/include/helpers/impl/MmulHelper.cpp
+++ b/libnd4j/include/helpers/impl/MmulHelper.cpp
@@ -207,7 +207,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND
     const bool isBVector = shape::isCommonVector(B->shapeInfo(), lenDim);
 
     // dot product of 2 vectors
-    if(isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1)))  // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4)
+    if(A->lengthOf() == B->lengthOf() && isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1)))  // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4)
         return dot(A, B, C, alpha, beta);
 
     // matrix x matrix
diff --git a/libnd4j/include/helpers/impl/Sqrtm.cpp b/libnd4j/include/helpers/impl/Sqrtm.cpp
new file mode 100644
index 000000000..5fe45656f
--- /dev/null
+++ b/libnd4j/include/helpers/impl/Sqrtm.cpp
@@ -0,0 +1,276 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#include <helpers/Sqrtm.h>
+#include <ops/declarable/helpers/lup.h>
+#include <helpers/EigenValsAndVecs.h>
+#include <helpers/HessenbergAndSchur.h>
+#include <helpers/FullPivLU.h>
+#include <helpers/MmulHelper.h>
+
+
+namespace sd      {
+namespace ops     {
+namespace helpers {
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+static void sqrtmQuasiTrianDiag(const NDArray& matrixT, NDArray& sqrtT ) {
+
+    const int rows = matrixT.sizeAt(0);
+
+    for(int i = 0; i < rows; i++) {
+
+        if (i == rows - 1 || matrixT.t<T>(i+1, i) == (T)0) {
+            const auto elemT = matrixT.t<T>(i, i);
+            if(elemT < (T)0)
+                throw std::runtime_error("ops::helpers::Sqrtm::sqrtmQuasiTrianDiag: can't take sqrt of negative diagonal element of T matrix !");
+            sqrtT.r<T>(i,i) = math::nd4j_sqrt<T,T>(elemT);
+        }
+        else {
+
+            EigenValsAndVecs<T> es(matrixT({i,i+2, i,i+2}, true));  // es._Vecs {2,2,2}, es._Vals{2,2}
+
+            const NDArray& vecs = es._Vecs;
+            const NDArray& vals = es._Vals;
+
+            const T& vecsReal00 = vecs.t<T>(0,0,0);
+            const T& vecsImag00 = vecs.t<T>(0,0,1);
+            const T& vecsReal01 = vecs.t<T>(0,1,0);
+            const T& vecsImag01 = vecs.t<T>(0,1,1);
+            const T& vecsReal10 = vecs.t<T>(1,0,0);
+            const T& vecsImag10 = vecs.t<T>(1,0,1);
+            const T& vecsReal11 = vecs.t<T>(1,1,0);
+            const T& vecsImag11 = vecs.t<T>(1,1,1);
+
+            // es.eigenvalues().cwiseSqrt().asDiagonal()
+            T eigenValsSqrt[2][2];
+            eigenValsSqrt[0][0] = vals.t<T>(0,0);
+            eigenValsSqrt[0][1] = vals.t<T>(0,1);
+            eigenValsSqrt[1][0] = vals.t<T>(1,0);
+            eigenValsSqrt[1][1] = vals.t<T>(1,1);
+            EigenValsAndVecs<T>::sqrtComplexNum(eigenValsSqrt[0][0], eigenValsSqrt[0][1]);
+            EigenValsAndVecs<T>::sqrtComplexNum(eigenValsSqrt[1][0], eigenValsSqrt[1][1]);
+
+            // es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal()
+            T vecsElem[2][2][2];
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal00,vecsImag00,  eigenValsSqrt[0][0],eigenValsSqrt[0][1],  vecsElem[0][0][0],vecsElem[0][0][1]);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal01,vecsImag01,  eigenValsSqrt[1][0],eigenValsSqrt[1][1],  vecsElem[0][1][0],vecsElem[0][1][1]);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal10,vecsImag10,  eigenValsSqrt[0][0],eigenValsSqrt[0][1],  vecsElem[1][0][0],vecsElem[1][0][1]);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal11,vecsImag11,  eigenValsSqrt[1][0],eigenValsSqrt[1][1],  vecsElem[1][1][0],vecsElem[1][1][1]);
+
+            // es.eigenvectors().inverse()
+            T vecsElemInv[2][2][2];
+
+            T tempReal, tempImag, divisorReal, divisorImag;
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal00,vecsImag00,  vecsReal11,vecsImag11,  divisorReal,divisorImag);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsReal01,vecsImag01,  vecsReal10,vecsImag10,  tempReal,tempImag);
+            divisorReal -= tempReal;
+            divisorImag -= tempImag;
+
+            EigenValsAndVecs<T>::divideComplexNums(vecsReal11,vecsImag11,    divisorReal,divisorImag,  vecsElemInv[0][0][0],vecsElemInv[0][0][1]);
+            EigenValsAndVecs<T>::divideComplexNums(-vecsReal01,-vecsImag01,  divisorReal,divisorImag,  vecsElemInv[0][1][0],vecsElemInv[0][1][1]);
+            EigenValsAndVecs<T>::divideComplexNums(-vecsReal10,-vecsImag10,  divisorReal,divisorImag,  vecsElemInv[1][0][0],vecsElemInv[1][0][1]);
+            EigenValsAndVecs<T>::divideComplexNums(vecsReal00,vecsImag00,    divisorReal,divisorImag,  vecsElemInv[1][1][0],vecsElemInv[1][1][1]);
+
+            // result
+            T result[2][2][2];
+
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1],  vecsElemInv[0][0][0],vecsElemInv[0][0][1],  tempReal,tempImag);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1],  vecsElemInv[1][0][0],vecsElemInv[1][0][1],  result[0][0][0],result[0][0][1]);
+            result[0][0][0] += tempReal;
+
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1],  vecsElemInv[0][1][0],vecsElemInv[0][1][1],  tempReal,tempImag);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1],  vecsElemInv[1][1][0],vecsElemInv[1][1][1],  result[0][1][0],result[0][1][1]);
+            result[0][1][0] += tempReal;
+
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1],  vecsElemInv[0][0][0],vecsElemInv[0][0][1],  tempReal,tempImag);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1],  vecsElemInv[1][0][0],vecsElemInv[1][0][1],  result[1][0][0],result[1][0][1]);
+            result[1][0][0] += tempReal;
+
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1],  vecsElemInv[0][1][0],vecsElemInv[0][1][1],  tempReal,tempImag);
+            EigenValsAndVecs<T>::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1],  vecsElemInv[1][1][0],vecsElemInv[1][1][1],  result[1][1][0],result[1][1][1]);
+            result[1][1][0] += tempReal;
+
+            sqrtT.r<T>(i,i)     = result[0][0][0];
+            sqrtT.r<T>(i,i+1)   = result[0][1][0];
+            sqrtT.r<T>(i+1,i)   = result[1][0][0];
+            sqrtT.r<T>(i+1,i+1) = result[1][1][0];
+
+            ++i;
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// all matrices are {2,2} here
+template <typename T>
+static void sqrtmQuasiTrianAuxEq(const NDArray& A, const NDArray& B, const NDArray& C, NDArray& X) {
+
+    NDArray tempMatrix(A.ordering(), {4,4}, A.dataType(), A.getContext());
+
+    tempMatrix.r<T>(0,0) = A.t<T>(0,0) + B.t<T>(0,0);
+    tempMatrix.r<T>(1,1) = A.t<T>(0,0) + B.t<T>(1,1);
+    tempMatrix.r<T>(2,2) = A.t<T>(1,1) + B.t<T>(0,0);
+    tempMatrix.r<T>(3,3) = A.t<T>(1,1) + B.t<T>(1,1);
+    tempMatrix.r<T>(0,1) = B.t<T>(1,0);
+    tempMatrix.r<T>(0,2) = A.t<T>(0,1);
+    tempMatrix.r<T>(1,0) = B.t<T>(0,1);
+    tempMatrix.r<T>(1,3) = A.t<T>(0,1);
+    tempMatrix.r<T>(2,0) = A.t<T>(1,0);
+    tempMatrix.r<T>(2,3) = B.t<T>(1,0);
+    tempMatrix.r<T>(3,1) = A.t<T>(1,0);
+    tempMatrix.r<T>(3,2) = B.t<T>(0,1);
+    tempMatrix.r<T>(0,3) = (T)0;
+    tempMatrix.r<T>(1,2) = (T)0;
+    tempMatrix.r<T>(2,1) = (T)0;
+    tempMatrix.r<T>(3,0) = (T)0;
+
+    NDArray result(A.ordering(), {4,1}, A.dataType(), A.getContext());
+    result.r<T>(0,0) = C.t<T>(0,0);
+    result.r<T>(1,0) = C.t<T>(0,1);
+    result.r<T>(2,0) = C.t<T>(1,0);
+    result.r<T>(3,0) = C.t<T>(1,1);
+
+    FullPivLU<T>::solve(tempMatrix, result, result);
+
+    X.r<T>(0,0) = result.t<T>(0);
+    X.r<T>(0,1) = result.t<T>(1);
+    X.r<T>(1,0) = result.t<T>(2);
+    X.r<T>(1,1) = result.t<T>(3);
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+static void sqrtmQuasiTrianOffDiag(const NDArray& matrixT, NDArray& sqrtT ) {
+
+    const int rows = matrixT.sizeAt(0);
+
+    for (int j = 1; j < rows; j++) {
+
+        if (matrixT.t<T>(j, j-1) != (T)0)
+            continue;
+
+        for (int i = j - 1; i >= 0; i--) {
+
+            if (i > 0 && matrixT.t<T>(i, i-1) != (T)0)
+                continue;
+
+            const bool iBlockIs2x2 = (i < rows - 1) && (matrixT.t<T>(i+1, i) != (T)0);
+            const bool jBlockIs2x2 = (j < rows - 1) && (matrixT.t<T>(j+1, j) != (T)0);
+
+            if (iBlockIs2x2 && jBlockIs2x2) {
+
+                NDArray A = sqrtT({i,i+2, i,i+2}, true);
+                NDArray B = sqrtT({j,j+2, j,j+2}, true);
+                NDArray X = matrixT({i,i+2, j,j+2}, true);//.dup();
+
+                if (j - i > 2)
+                    X -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+2}, true));
+
+                sqrtmQuasiTrianAuxEq<T>(A, B, X, X);
+
+                sqrtT.syncToDevice();
+                sqrtT({i,i+2, j,j+2}, true).assign(X);
+            }
+            else if (iBlockIs2x2 && !jBlockIs2x2) {
+
+                NDArray rhs = matrixT({i,i+2, j,j+1}, true);//.dup();
+
+                if (j - i > 2)
+                    rhs -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+1}, true));
+
+                NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext());
+                A.r<T>(0,0) = A.r<T>(1,1) = sqrtT.t<T>(j,j);
+                A.r<T>(0,1) = A.r<T>(1,0) = T(0);
+                A += sqrtT({i,i+2, i,i+2}, true);
+
+                FullPivLU<T>::solve(A,rhs,rhs);
+
+                // sqrtT.syncToDevice();
+                sqrtT({i,i+2, j,j+1}, true).assign(rhs);
+            }
+            else if (!iBlockIs2x2 && jBlockIs2x2) {
+
+                NDArray rhs = matrixT({i,i+1, j,j+2}, true);//.dup();
+
+                if (j - i > 1)
+                    rhs -= mmul(sqrtT({i,i+1, i+1,j}, true), sqrtT({i+1,j, j,j+2}, true));
+
+                NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext());
+                A.r<T>(0,0) = A.r<T>(1,1) = sqrtT.t<T>(i,i);
+                A.r<T>(0,1) = A.r<T>(1,0) = T(0);
+                A += sqrtT({j,j+2, j,j+2}, true).transpose();
+
+                NDArray rhsT = rhs.transpose();
+                FullPivLU<T>::solve(A,rhsT,rhsT);
+
+                // sqrtT.syncToDevice();
+                sqrtT({i,i+1, j,j+2}, true).assign(rhs);
+            }
+            else if (!iBlockIs2x2 && !jBlockIs2x2) {
+
+                T temp = mmul(sqrtT({i,i+1, i+1,j}), sqrtT({i+1,j, j,j+1})).t<T>(0);        // dot
+                sqrtT.r<T>(i,j) = (matrixT.t<T>(i,j) - temp ) / (sqrtT.t<T>(i,i) + sqrtT.t<T>(j,j));
+            }
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Sqrtm<T>::calc(const NDArray& in, NDArray& out) {
+
+    if(in.rankOf() != 2 || in.sizeAt(0) != in.sizeAt(1))
+        throw std::runtime_error("ops::helpers::Sqrtm::calc: input matrix must have rank 2 and be square !");
+    if(!out.isSameShape(in))
+        throw std::runtime_error("ops::helpers::Sqrtm::calc: output matrix must have the same shape as input one!");
+
+    if(in.lengthOf() == 1) {
+        out.r<T>(0) = math::nd4j_sqrt<T,T>(in.t<T>(0));
+        return;
+    }
+
+    ops::helpers::Schur<T> schur(in);
+
+    const NDArray& t1 = schur._T;
+    const NDArray& t2 = schur._U;
+
+    NDArray sqrtT = in.ulike();
+    sqrtT.nullify();
+
+    sqrtmQuasiTrianDiag<T>(schur._T, sqrtT);
+    sqrtmQuasiTrianOffDiag<T>(schur._T, sqrtT);
+
+    // out = U * sqrtT * U^T;
+    NDArray temp = mmul(sqrtT, schur._U.transpose());
+    MmulHelper::mmul(&schur._U, &temp, &out);
+}
+
+template class ND4J_EXPORT Sqrtm<float>;
+template class ND4J_EXPORT Sqrtm<float16>;
+template class ND4J_EXPORT Sqrtm<bfloat16>;
+template class ND4J_EXPORT Sqrtm<double>;
+
+
+}
+}
+}
\ No newline at end of file
diff --git a/libnd4j/include/helpers/impl/biDiagonalUp.cpp b/libnd4j/include/helpers/impl/biDiagonalUp.cpp
new file mode 100644
index 000000000..d5326c21a
--- /dev/null
+++ b/libnd4j/include/helpers/impl/biDiagonalUp.cpp
@@ -0,0 +1,160 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// Created by Yurii Shyrma on 18.12.2017
+//
+
+
+#include <helpers/householder.h>
+#include <helpers/biDiagonalUp.h>
+
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+
+//////////////////////////////////////////////////////////////////////////
+BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(NDArray(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())),
+                                                   _HHbidiag(NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) {
+
+	// input validation
+	if(matrix.rankOf() != 2 || matrix.isScalar())
+		throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !");
+
+	_HHmatrix.assign(&matrix);
+	_HHbidiag.assign(0.);
+
+	evalData();
+}
+
+template <typename T>
+void BiDiagonalUp::_evalData() {
+
+	const auto rows = _HHmatrix.sizeAt(0);
+	const auto cols = _HHmatrix.sizeAt(1);
+
+	if(rows < cols)
+		throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !");
+
+	T coeff, normX;
+
+	T x, y;
+
+	for(Nd4jLong i = 0; i < cols-1; ++i ) {
+
+		// evaluate Householder matrix nullifying columns
+		NDArray column1 = _HHmatrix({i,rows,  i,i+1});
+
+        x = _HHmatrix.t<T>(i,i);
+        y = _HHbidiag.t<T>(i,i);
+
+		Householder<T>::evalHHmatrixDataI(column1, x, y);
+
+        _HHmatrix.r<T>(i, i) = x;
+        _HHbidiag.r<T>(i, i) = y;
+
+		// multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner
+		NDArray bottomRightCorner1 = _HHmatrix({i,rows,  i+1,cols}, true);	// {i, cols}
+		Householder<T>::mulLeft(bottomRightCorner1, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.t<T>(i,i));
+
+		if(i == cols-2)
+			continue; 										// do not apply right multiplying at last iteration
+
+		// evaluate Householder matrix nullifying rows
+		NDArray row1 = _HHmatrix({i,i+1,  i+1,cols});
+
+        x = _HHmatrix.t<T>(i,i+1);
+        y = _HHbidiag.t<T>(i,i+1);
+
+		Householder<T>::evalHHmatrixDataI(row1, x, y);
+
+        _HHmatrix.r<T>(i, i+1) = x;
+        _HHbidiag.r<T>(i, i+1) = y;
+
+		// multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P
+		NDArray bottomRightCorner2 = _HHmatrix({i+1,rows,  i+1,cols}, true);  // {i, rows}
+
+		Householder<T>::mulRight(bottomRightCorner2, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.t<T>(i,i+1));
+	}
+
+	NDArray row2 =_HHmatrix({cols-2,cols-1, cols-1,cols});
+
+	x = _HHmatrix.t<T>(cols-2,cols-1);
+	y = _HHbidiag.t<T>(cols-2,cols-1);
+
+	Householder<T>::evalHHmatrixDataI(row2, x, y);
+
+    _HHmatrix.r<T>(cols-2,cols-1) = x;
+    _HHbidiag.r<T>(cols-2,cols-1) = y;
+
+	NDArray column2 = _HHmatrix({cols-1,rows, cols-1,cols});
+
+	x = _HHmatrix.t<T>(cols-1,cols-1);
+	y = _HHbidiag.t<T>(cols-1,cols-1);
+
+	Householder<T>::evalHHmatrixDataI(column2, x, y);
+
+	_HHmatrix.r<T>(cols-1, cols-1) = x;
+    _HHbidiag.r<T>(cols-1, cols-1) = y;
+}
+
+//////////////////////////////////////////////////////////////////////////
+void BiDiagonalUp::evalData() {
+	auto xType = _HHmatrix.dataType();
+	BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES);
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+HHsequence BiDiagonalUp::makeHHsequence_(const char type) {
+
+	const int diagSize = type == 'u' ? _HHbidiag.sizeAt(0) : _HHbidiag.sizeAt(0) - 1;
+
+	_hhCoeffs = NDArray(_HHmatrix.ordering(),  {diagSize}, _HHmatrix.dataType(), _HHmatrix.getContext());
+
+	if(type == 'u')
+	    for(int i = 0; i < diagSize; ++i)
+	        _hhCoeffs.r<T>(i) = _HHmatrix.t<T>(i,i);
+    else
+    	for(int i = 0; i < diagSize; ++i)
+        	_hhCoeffs.r<T>(i) = _HHmatrix.t<T>(i,i+1);
+
+    HHsequence result(_HHmatrix, _hhCoeffs, type);
+
+    if(type != 'u') {
+        result._diagSize = diagSize;
+    	result._shift  = 1;
+    }
+
+    return result;
+}
+
+//////////////////////////////////////////////////////////////////////////
+HHsequence BiDiagonalUp::makeHHsequence(const char type) {
+	auto xType = _HHmatrix.dataType();
+	BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES);
+}
+
+
+
+BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES);
+BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type), FLOAT_TYPES);
+
+}
+}
+}
\ No newline at end of file
diff --git a/libnd4j/include/helpers/impl/hhColPivQR.cpp b/libnd4j/include/helpers/impl/hhColPivQR.cpp
new file mode 100644
index 000000000..6f4bbebc9
--- /dev/null
+++ b/libnd4j/include/helpers/impl/hhColPivQR.cpp
@@ -0,0 +1,147 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// Created by Yurii Shyrma on 11.01.2018
+//
+
+#include <helpers/hhColPivQR.h>
+#include <helpers/householder.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+
+//////////////////////////////////////////////////////////////////////////
+HHcolPivQR::HHcolPivQR(const NDArray& matrix) {
+
+    _qr = matrix.dup();
+    _diagSize = math::nd4j_min<int>(matrix.sizeAt(0), matrix.sizeAt(1));
+    _coeffs = NDArray(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext());
+
+    _permut = NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext());
+
+    evalData();
+}
+
+    void HHcolPivQR::evalData() {
+        BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES);
+    }
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void HHcolPivQR::_evalData() {
+
+    const int rows = _qr.sizeAt(0);
+    const int cols = _qr.sizeAt(1);
+
+    NDArray transp(_qr.ordering(),   {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext());
+    NDArray normsUpd(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext());
+    NDArray normsDir(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext());
+
+    int transpNum = 0;
+
+    for (int k = 0; k < cols; ++k)
+        normsDir.r<T>(k) = normsUpd.r<T>(k) = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).t<T>(0);
+
+    T normScaled = (normsUpd.reduceNumber(reduce::Max)).t<T>(0) * DataTypeUtils::eps<T>();
+    T threshold1 = normScaled * normScaled / (T)rows;
+    T threshold2 = math::nd4j_sqrt<T,T>(DataTypeUtils::eps<T>());
+
+    T nonZeroPivots = _diagSize;
+    T maxPivot = 0.;
+
+    for(int k = 0; k < _diagSize; ++k) {
+
+        int biggestColIndex = normsUpd({k,-1}).indexReduceNumber(indexreduce::IndexMax).e<int>(0);
+        T biggestColNorm = normsUpd({k,-1}).reduceNumber(reduce::Max).t<T>(0);
+        T biggestColSqNorm = biggestColNorm * biggestColNorm;
+        biggestColIndex += k;
+
+        if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k))
+            nonZeroPivots = k;
+
+        transp.r<T>(k) = (T)biggestColIndex;
+
+        if(k != biggestColIndex) {
+
+            NDArray temp1(_qr({0,0, k,k+1}));
+            NDArray temp2(_qr({0,0, biggestColIndex,biggestColIndex+1}));
+            temp1.swapUnsafe(temp2);
+
+            math::nd4j_swap<T>(normsUpd.r<T>(k), normsUpd.r<T>(biggestColIndex));
+            math::nd4j_swap<T>(normsDir.r<T>(k), normsDir.r<T>(biggestColIndex));
+
+            ++transpNum;
+        }
+
+        T normX, c;
+        NDArray qrBlock = _qr({k,rows, k,k+1});
+        Householder<T>::evalHHmatrixDataI(qrBlock, c, normX);
+
+        _coeffs.r<T>(k) = c;
+
+        _qr.r<T>(k,k) = normX;
+
+        T max = math::nd4j_abs<T>(normX);
+        if(max > maxPivot)
+            maxPivot = max;
+
+        if(k < rows && (k+1) < cols) {
+            NDArray qrBlock = _qr({k,rows,  k+1,cols}, true);
+            NDArray tail = _qr({k+1,rows,  k, k+1}, true);
+            Householder<T>::mulLeft(qrBlock, tail, _coeffs.t<T>(k));
+        }
+
+        for (int j = k + 1; j < cols; ++j) {
+
+            if (normsUpd.t<T>(j) != (T)0.f) {
+
+                T temp = math::nd4j_abs<T>(_qr.t<T>(k, j)) / normsUpd.t<T>(j);
+                temp = ((T)1. + temp) * ((T)1. - temp);
+                temp = temp < (T)0. ? (T)0. : temp;
+                T temp2 = temp * normsUpd.t<T>(j) * normsUpd.t<T>(j) / (normsDir.t<T>(j)*normsDir.t<T>(j));
+
+                if (temp2 <= threshold2) {
+                    if(k+1 < rows && j < cols)
+                        normsDir.r<T>(j) = _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).t<T>(0);
+
+                    normsUpd.r<T>(j) = normsDir.t<T>(j);
+                }
+                else
+                    normsUpd.r<T>(j) = normsUpd.t<T>(j) * math::nd4j_sqrt<T, T>(temp);
+            }
+        }
+    }
+
+    _permut.setIdentity();
+
+    for(int k = 0; k < _diagSize; ++k) {
+
+        int idx = transp.e<int>(k);
+        NDArray temp1 = _permut({0,0, k, k+1});
+        NDArray temp2 = _permut({0,0, idx,idx+1});
+        temp1.swapUnsafe(temp2);
+    }
+}
+
+BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES);
+
+}
+}
+}
+
diff --git a/libnd4j/include/helpers/cpu/hhSequence.cpp b/libnd4j/include/helpers/impl/hhSequence.cpp
similarity index 59%
rename from libnd4j/include/helpers/cpu/hhSequence.cpp
rename to libnd4j/include/helpers/impl/hhSequence.cpp
index 8a2a35329..dc038dfc8 100644
--- a/libnd4j/include/helpers/cpu/hhSequence.cpp
+++ b/libnd4j/include/helpers/impl/hhSequence.cpp
@@ -20,7 +20,6 @@
 
 #include <helpers/hhSequence.h>
 #include <helpers/householder.h>
-#include <array/NDArrayFactory.h>
 
 namespace sd {
 namespace ops {
@@ -29,40 +28,32 @@ namespace helpers {
 
 //////////////////////////////////////////////////////////////////////////
 HHsequence::HHsequence(const NDArray& vectors, const NDArray& coeffs, const char type): _vectors(vectors), _coeffs(coeffs) {
-	
+
 	_diagSize = sd::math::nd4j_min(_vectors.sizeAt(0), _vectors.sizeAt(1));
-	_shift = 0;    
+	_shift = 0;
 	_type  = type;
 }
 
 //////////////////////////////////////////////////////////////////////////
 template <typename T>
-void HHsequence::_mulLeft(NDArray& matrix) {
+void HHsequence::mulLeft_(NDArray& matrix) {
 
 	const int rows   = _vectors.sizeAt(0);
 	const int cols   = _vectors.sizeAt(1);
-	const int inRows = matrix.sizeAt(0);	
+	const int inRows = matrix.sizeAt(0);
 
-	NDArray* block(nullptr);
+	for(int i = _diagSize - 1; i >= 0; --i) {
 
-	for(int i = _diagSize - 1; i >= 0; --i) {		
-    	
     	if(_type == 'u') {
-    		
-    		block = new NDArray(matrix({inRows-rows+_shift+ i,inRows,  0,0}, true));
-    		T _x = _coeffs.e<T>(i);
-    		Householder<T>::mulLeft(*block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _x);
-    		_coeffs.p<T>(i, _x);
+
+    		NDArray block = matrix({inRows-rows+_shift+ i,inRows,  0,0}, true);
+    		Householder<T>::mulLeft(block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _coeffs.t<T>(i));
     	}
     	else {
 
-    		block = new NDArray(matrix({inRows-cols+_shift+i,inRows,  0,0}, true));
-            T _x = _coeffs.e<T>(i);
-    		Householder<T>::mulLeft(*block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _x);
-            _coeffs.p<T>(i, _x);
+    		NDArray block = matrix({inRows-cols+_shift+i,inRows,  0,0}, true);
+    		Householder<T>::mulLeft(block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _coeffs.t<T>(i));
     	}
-
-    	delete block;
     }
 }
 
@@ -70,55 +61,51 @@ void HHsequence::_mulLeft(NDArray& matrix) {
 //////////////////////////////////////////////////////////////////////////
 NDArray HHsequence::getTail(const int idx) const {
 
-    
+
     int first = idx + 1 + _shift;
-    
+
     if(_type == 'u')
         return _vectors({first, -1, idx, idx+1}, true);
     else
-        return _vectors({idx, idx+1, first, -1}, true);    
+        return _vectors({idx, idx+1, first, -1}, true);
 }
 
-
 //////////////////////////////////////////////////////////////////////////
 template <typename T>
-void HHsequence::_applyTo(NDArray& dest) {
-    
+void HHsequence::applyTo_(NDArray& dest) {
+
     int size = _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1);
 
     if(dest.rankOf() != 2 || (dest.sizeAt(0) != size && dest.sizeAt(1) != size))
-        dest = NDArrayFactory::create(dest.ordering(), {size, size}, dest.dataType(), dest.getContext());
+        dest = NDArray(dest.ordering(), {size, size}, dest.dataType(), dest.getContext());
     dest.setIdentity();
-    
+
     for(int k = _diagSize - 1; k >= 0; --k) {
-        
+
         int curNum = size - k - _shift;
         if(curNum < 1 || (k + 1 + _shift) >= size )
             continue;
         auto block =  dest({dest.sizeAt(0)-curNum,dest.sizeAt(0),  dest.sizeAt(1)-curNum,dest.sizeAt(1)}, true);
-        T _x = _coeffs.e<T>(k);
 
-        Householder<T>::mulLeft(block, getTail(k), _x);
-
-        _coeffs.p<T>(k, _x);
-    }  
-}
-
-
-    void HHsequence::applyTo(NDArray& dest) {
-        auto xType = _coeffs.dataType();
-
-        BUILD_SINGLE_SELECTOR(xType, _applyTo, (dest), FLOAT_TYPES);
+        Householder<T>::mulLeft(block, getTail(k), _coeffs.t<T>(k));
     }
+}
 
-    void HHsequence::mulLeft(NDArray& matrix) {
-        auto xType = _coeffs.dataType();
+//////////////////////////////////////////////////////////////////////////
+void HHsequence::applyTo(NDArray& dest) {
+    auto xType = _coeffs.dataType();
+    BUILD_SINGLE_SELECTOR(xType, applyTo_, (dest), FLOAT_TYPES);
+}
 
-        BUILD_SINGLE_SELECTOR(xType, _mulLeft, (matrix), FLOAT_TYPES);
-    }
+//////////////////////////////////////////////////////////////////////////
+void HHsequence::mulLeft(NDArray& matrix) {
+    auto xType = _coeffs.dataType();
+    BUILD_SINGLE_SELECTOR(xType, mulLeft_, (matrix), FLOAT_TYPES);
+}
+
+BUILD_SINGLE_TEMPLATE(template void HHsequence::applyTo_, (sd::NDArray &dest), FLOAT_TYPES);
+BUILD_SINGLE_TEMPLATE(template void HHsequence::mulLeft_, (NDArray& matrix), FLOAT_TYPES);
 
-    BUILD_SINGLE_TEMPLATE(template void HHsequence::_applyTo, (sd::NDArray &dest), FLOAT_TYPES);
-    BUILD_SINGLE_TEMPLATE(template void HHsequence::_mulLeft, (NDArray& matrix), FLOAT_TYPES);
 }
 }
 }
diff --git a/libnd4j/include/helpers/impl/householder.cpp b/libnd4j/include/helpers/impl/householder.cpp
new file mode 100644
index 000000000..e9572f9f6
--- /dev/null
+++ b/libnd4j/include/helpers/impl/householder.cpp
@@ -0,0 +1,218 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// Created by Yurii Shyrma on 18.12.2017
+//
+
+#include <helpers/householder.h>
+
+namespace sd {
+namespace ops {
+namespace helpers {
+
+
+//////////////////////////////////////////////////////////////////////////
+// template <typename T>
+// NDArray Householder<T>::evalHHmatrix(const NDArray& x) {
+
+// 	// input validation
+// 	if(x.rankOf() != 1 && !x.isScalar())
+// 		throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: iinput array must have rank = 1 or to be scalar!");
+
+// 	const auto xLen = x.lengthOf();
+
+// 	NDArray w(x.ordering(), {xLen, 1}, x.dataType(), x.getContext());							// column-vector
+
+// 	NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray();
+// 	T tailXnorm   = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t<T>(0) : (T)0;
+
+// 	const auto xFirstElem = x.t<T>(0);
+
+// 	T coeff, normX;
+
+// 	if(tailXnorm <= DataTypeUtils::min<T>()) {
+
+// 		normX = xFirstElem;
+// 		coeff = 0.f;
+// 		if(xLen > 1)
+// 			w({1,-1, 0,0}) = 0.f;
+// 	}
+// 	else {
+
+// 		normX = math::nd4j_sqrt<T,T>(xFirstElem*xFirstElem + tailXnorm);
+
+// 		if(xFirstElem >= (T)0.f)
+// 			normX = -normX;									// choose opposite sign to lessen roundoff error
+
+// 		coeff = (normX - xFirstElem) / normX;
+
+// 		if(xLen > 1)
+// 			w({1,-1, 0,0}).assign(xTail / (xFirstElem - normX));
+// 	}
+
+// 	w.t<T>(0) = (T)1;
+
+// 	NDArray identity(x.ordering(), {xLen, xLen}, x.dataType(), x.getContext());
+// 	identity.setIdentity();																			// identity matrix
+
+// 	return identity - mmul(w, w.transpose()) * coeff;
+// }
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Householder<T>::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) {
+
+	// input validation
+	if(x.rankOf() != 1 && !x.isScalar())
+		throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must have rank = 1 or to be scalar!");
+
+	if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1)
+		throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!");
+
+	const auto xLen = x.lengthOf();
+
+	const NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray();
+
+	T tailXnorm   = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t<T>(0) : (T)0;
+
+	const auto xFirstElem = x.t<T>(0);
+
+	if(tailXnorm <= DataTypeUtils::min<T>()) {
+
+		normX = xFirstElem;
+		coeff = (T)0.f;
+		tail = (T)0.f;
+	}
+	else {
+
+		normX = math::nd4j_sqrt<T,T>(xFirstElem*xFirstElem + tailXnorm);
+
+		if(xFirstElem >= (T)0.f)
+			normX = -normX;									// choose opposite sign to lessen roundoff error
+
+		coeff = (normX - xFirstElem) / normX;
+
+		tail.assign(xTail / (xFirstElem - normX));
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Householder<T>::evalHHmatrixDataI(NDArray& x, T& coeff, T& normX) {
+
+	// input validation
+	if(x.rankOf() != 1 && !x.isScalar())
+		throw std::runtime_error("ops::helpers::Householder::evalHHmatrixDataI method: input array must have rank = 1 or to be scalar!");
+
+	int rows = (int)x.lengthOf()-1;
+	int num = 1;
+
+	if(rows == 0) {
+		rows = 1;
+		num = 0;
+	}
+
+	NDArray tail = x({num, -1});
+
+	evalHHmatrixData(x, tail, coeff, normX);
+}
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Householder<T>::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) {
+
+	// if(matrix.rankOf() != 2)
+	// 	throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !";
+
+	if(matrix.sizeAt(0) == 1 && coeff != (T)0) {
+
+		matrix *= (T) 1.f - coeff;
+    }
+    else if(coeff != (T)0.f) {
+
+  		NDArray bottomPart = matrix({1,matrix.sizeAt(0), 0,0}, true);
+  		NDArray fistRow = matrix({0,1, 0,0}, true);
+
+		if(tail.isColumnVector()) {
+
+    		auto resultingRow = mmul(tail.transpose(), bottomPart);
+    		resultingRow += fistRow;
+    		resultingRow *= coeff;
+    		fistRow -= resultingRow;
+    		bottomPart -= mmul(tail, resultingRow);
+		}
+		else {
+
+    		auto resultingRow = mmul(tail, bottomPart);
+    		resultingRow += fistRow;
+    		resultingRow *= coeff;
+    		fistRow -= resultingRow;
+    		bottomPart -= mmul(tail.transpose(), resultingRow);
+		}
+	}
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+void Householder<T>::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) {
+
+	// if(matrix.rankOf() != 2)
+	// 	throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !";
+
+	if(matrix.sizeAt(1) == 1 && coeff != (T)0) {
+    	matrix *= (T)1.f - coeff;
+	}
+  	else if(coeff != (T)0.f) {
+
+  		NDArray rightPart = matrix({0,0, 1,matrix.sizeAt(1)}, true);
+		NDArray fistCol   = matrix({0,0, 0,1}, true);
+
+  		if(tail.isColumnVector()) {
+
+    		auto resultingCol = mmul(rightPart, tail);
+    		resultingCol += fistCol;
+    		resultingCol *= coeff;
+    		fistCol -= resultingCol;
+    		rightPart -= mmul(resultingCol, tail.transpose());
+		}
+		else {
+
+    		auto resultingCol = mmul(rightPart, tail.transpose());
+    		resultingCol += fistCol;
+    		resultingCol *= coeff;
+    		fistCol -= resultingCol;
+    		rightPart -= mmul(resultingCol, tail);
+		}
+	}
+}
+
+
+template class ND4J_EXPORT Householder<float>;
+template class ND4J_EXPORT Householder<float16>;
+template class ND4J_EXPORT Householder<bfloat16>;
+template class ND4J_EXPORT Householder<double>;
+
+
+
+
+
+
+
+}
+}
+}
diff --git a/libnd4j/include/helpers/cpu/jacobiSVD.cpp b/libnd4j/include/helpers/impl/jacobiSVD.cpp
similarity index 58%
rename from libnd4j/include/helpers/cpu/jacobiSVD.cpp
rename to libnd4j/include/helpers/impl/jacobiSVD.cpp
index 372a2a409..7fbf183b2 100644
--- a/libnd4j/include/helpers/cpu/jacobiSVD.cpp
+++ b/libnd4j/include/helpers/impl/jacobiSVD.cpp
@@ -20,8 +20,7 @@
 
 #include <helpers/jacobiSVD.h>
 #include <helpers/hhColPivQR.h>
-#include <array/NDArrayFactory.h>
-
+#include <helpers/MmulHelper.h>
 
 namespace sd {
 namespace ops {
@@ -43,27 +42,27 @@ JacobiSVD<T>::JacobiSVD(const NDArray& matrix, const bool calcU, const bool calc
     _calcV = calcV;
     _fullUV = fullUV;
 
-    _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext());
+    _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext());
 
     if(_calcU) {
         if(_fullUV)
-            _u = NDArrayFactory::create(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext());
+            _u = NDArray(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext());
         else
-            _u = NDArrayFactory::create(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext());
+            _u = NDArray(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext());
     }
     else
-        _u = NDArrayFactory::create(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext());
+        _u = NDArray(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext());
 
     if(_calcV) {
         if(_fullUV)
-            _v = NDArrayFactory::create(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext());
+            _v = NDArray(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext());
         else
-            _v = NDArrayFactory::create(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext());
+            _v = NDArray(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext());
     }
     else
-        _v = NDArrayFactory::create(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext());
+        _v = NDArray(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext());
 
-    _m = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext());
+    _m = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext());
 
     evalData(matrix);
 }
@@ -77,16 +76,19 @@ void JacobiSVD<T>::mulRotationOnLeft(const int i, const int j, NDArray& block, c
         if(j+1 > block.sizeAt(0))
             throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: second arguments is out of array row range !");
 
-        auto pTemp = block({i,j+1,j-i,  0,0,0}, true, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(rotation, temp));
+        auto temp = block({i,j+1,j-i,  0,0,0}, true, true);
+        temp.assign(mmul(rotation, temp));
+
+        // auto pTemp = block({i,j+1,j-i,  0,0,0}, true, true);
+        // auto temp = pTemp.dup();
+        // pTemp.assign(mmul(rotation, temp));
     }
     else {
 
         if(j+1 > block.sizeAt(0) || i+1 > block.sizeAt(0))
             throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: some or both integer arguments are out of array row range !");
 
-        auto temp = NDArrayFactory::create(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext());
+        NDArray temp(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext());
         auto row1     = block({i,i+1, 0,0}, true);
         auto row2     = block({j,j+1, 0,0}, true);
         auto rowTemp1 = temp({0,1, 0,0}, true);
@@ -108,16 +110,19 @@ void JacobiSVD<T>::mulRotationOnRight(const int i, const int j, NDArray& block,
         if(j+1 > block.sizeAt(1))
             throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: second argument is out of array column range !");
 
-        auto pTemp = block({0,0,0,  i,j+1,j-i}, true, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, rotation));
+        auto temp = block({0,0,0,  i,j+1,j-i}, true, true);
+        temp.assign(mmul(temp, rotation));
+
+        // auto pTemp = block({0,0,0,  i,j+1,j-i}, true, true);
+        // auto temp = pTemp.dup();
+        // pTemp.assign(mmul(temp, rotation));
     }
     else {
 
         if(j+1 > block.sizeAt(1) || i+1 > block.sizeAt(1))
             throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: some or both integer arguments are out of array column range !");
 
-        auto temp = NDArrayFactory::create(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext());
+        NDArray temp(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext());
         auto col1     = block({0,0, i,i+1}, true);
         auto col2     = block({0,0, j,j+1}, true);
         auto colTemp1 = temp({0,0, 0,1}, true);
@@ -134,123 +139,148 @@ void JacobiSVD<T>::mulRotationOnRight(const int i, const int j, NDArray& block,
 template <typename T>
 bool JacobiSVD<T>::isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem) {
 
-    auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
-    T n = math::nd4j_sqrt<T,T>(block.e<T>(p,p) * block.e<T>(p,p) + block.e<T>(q,p) * block.e<T>(q,p));
+    NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
+
+    T n = math::nd4j_sqrt<T,T>(block.t<T>(p, p) * block.t<T>(p, p)  + block.t<T>(q, p)*block.t<T>(q, p));
 
     const T almostZero = DataTypeUtils::min<T>();
     const T precision = DataTypeUtils::eps<T>();
 
     if(n == (T)0.f) {
-        block.p(p, p, 0.f);
-        block.p(q, p, 0.f);
+        block.r<T>(p, p) = (T)0;
+        block.r<T>(q, p) = (T)0;
     } else {
-        T v = block.e<T>(p, p) / n;
+        T v = block.t<T>(p, p) / n;
 
-        rotation.p(0, 0, v);
-        rotation.p(1,1, v);
+        rotation.r<T>(0,0) = rotation.r<T>(1,1) = v;
 
-        v = block.e<T>(q,p) / n;
-        rotation.p(0, 1, v);
+        v = block.t<T>(q, p) / n;
+        rotation.r<T>(0,1) = v;
 
-        rotation.p(1,0, -rotation.template e<T>(0, 1));
+        rotation.r<T>(1,0) = -rotation.template t<T>(0,1);
         mulRotationOnLeft(p, q, block, rotation);
 
-        if(_calcU) {
-            auto temp2 = rotation.transpose();
-            mulRotationOnRight(p, q, _u, temp2);
-        }
+        if(_calcU)
+            mulRotationOnRight(p, q, _u, rotation.transpose());
     }
 
-    maxElem = math::nd4j_max<T>(maxElem, math::nd4j_max<T>(math::nd4j_abs<T>(block.e<T>(p,p)), math::nd4j_abs<T>(block.e<T>(q,q))));
+    maxElem = math::nd4j_max<T>(maxElem, math::nd4j_max<T>(math::nd4j_abs<T>(block.t<T>(p, p)), math::nd4j_abs<T>(block.t<T>(q, q))));
     T threshold = math::nd4j_max<T>(almostZero, precision * maxElem);
-    const bool condition1 = math::nd4j_abs<T>(block.e<T>(p,q)) > threshold;
-    const bool condition2 = math::nd4j_abs<T>(block.e<T>(q,p)) > threshold;
 
-    return condition1 || condition2;
+    return math::nd4j_abs<T>(block.t<T>(p, q)) > threshold || math::nd4j_abs<T>(block.t<T>(q, p)) > threshold;
 }
 
 //////////////////////////////////////////////////////////////////////////
 template <typename T>
 bool JacobiSVD<T>::createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation) {
 
-    T denom = 2.* math::nd4j_abs<T>(y);
+    T denom = (T)(2.f)* math::nd4j_abs<T>(y);
 
     if(denom < DataTypeUtils::min<T>()) {
 
-        rotation.p(0,0, 1.f);
-        rotation.p(1,1, 1.f);
-        rotation.p(0,1, 0.f);
-        rotation.p(1,0, 0.f);
+        rotation.r<T>(0,0) = rotation.r<T>(1,1) = (T)1.f;
+        rotation.r<T>(0,1) = rotation.r<T>(1,0) = (T)0.f;
+
         return false;
     }
     else {
 
         T tau = (x-z)/denom;
-        T w = math::nd4j_sqrt<T,T>(tau*tau + 1.);
+        T w = math::nd4j_sqrt<T,T>(tau*tau + (T)1.f);
         T t;
 
         if(tau > (T)0.)
-            t = 1. / (tau + w);
+            t = (T)1.f / (tau + w);
         else
-            t = 1. / (tau - w);
+            t = (T)1.f / (tau - w);
 
-        T sign = t > (T)0. ? 1. : -1.;
-        T n = 1. / math::nd4j_sqrt<T,T>(t*t + 1.f);
-        rotation.p(0,0, n);
-        rotation.p(1,1, n);
+        T sign = t > (T)0. ? (T)1.f : (T)-1.f;
 
-        rotation.p(0,1,  -sign * (y / math::nd4j_abs<T>(y)) * math::nd4j_abs<T>(t) * n);
-        rotation.p(1,0, -rotation.e<T>(0,1));
+        T cos = (T)1.f / math::nd4j_sqrt<T,T>(t*t + (T)1.f);
+        T sin = -sign * (y / math::nd4j_abs<T>(y)) * math::nd4j_abs<T>(t) * cos;
+
+        rotation.r<T>(0,1) = sin;
+        rotation.r<T>(1,0) = -sin;
+        rotation.r<T>(0,0) = rotation.r<T>(1,1) = cos;
 
         return true;
     }
 }
 
+
+//////////////////////////////////////////////////////////////////////////
+template<typename T>
+void JacobiSVD<T>::createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation) {
+
+    T cos, sin;
+
+    if(q == (T)0) {
+
+        cos = p < (T)0 ? (T)-1 : (T)1;
+        sin = (T)0;
+    }
+    else if(p == (T)0) {
+
+        cos = (T)0;
+        sin = q < (T)0 ? (T)1 : (T)-1;
+    }
+    else if(math::nd4j_abs<T>(p) > math::nd4j_abs<T>(q)) {
+
+        T t = q / p;
+        T u = math::nd4j_sqrt<T,T>((T)1 + t*t);
+        if(p < (T)0)
+            u = -u;
+        cos = (T)1 / u;
+        sin = -t * cos;
+    }
+    else {
+        T t = p / q;
+        T u = math::nd4j_sqrt<T,T>((T)1 + t*t);
+        if(q < (T)0)
+            u = -u;
+        sin = -(T)1 / u;
+        cos = -t * sin;
+    }
+
+    rotation.r<T>(0,1) = sin;
+    rotation.r<T>(1,0) = -sin;
+    rotation.r<T>(0,0) = rotation.r<T>(1,1) = cos;
+}
+
+
 //////////////////////////////////////////////////////////////////////////
 template <typename T>
 void JacobiSVD<T>::svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right) {
 
-    auto m = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext());
-    m.p<T>(0,0, block.e<T>(p,p));
-    m.p<T>(0,1, block.e<T>(p,q));
-    m.p<T>(1,0, block.e<T>(q,p));
-    m.p<T>(1,1, block.e<T>(q,q));
+    NDArray m(block.ordering(), {2, 2}, block.dataType(), block.getContext());
+    m.r<T>(0,0) = block.t<T>(p,p);
+    m.r<T>(0,1) = block.t<T>(p,q);
+    m.r<T>(1,0) = block.t<T>(q,p);
+    m.r<T>(1,1) = block.t<T>(q,q);
 
-    auto rotation = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext());
-    T t = m.e<T>(0,0) + m.e<T>(1,1);
-    T d = m.e<T>(1,0) - m.e<T>(0,1);
+    NDArray rotation(block.ordering(), {2, 2}, block.dataType(), block.getContext());
+    T t = m.t<T>(0,0) + m.t<T>(1,1);
+    T d = m.t<T>(1,0) - m.t<T>(0,1);
 
     if(math::nd4j_abs<T>(d) < DataTypeUtils::min<T>()) {
 
-        rotation.p(0,0, 1.f);
-        rotation.p(1,1, 1.f);
-        rotation.p(0,1, 0.f);
-        rotation.p(1,0, 0.f);
+        rotation.r<T>(0,0) = rotation.r<T>(1,1) = (T)1;
+        rotation.r<T>(0,1) = rotation.r<T>(1,0) = (T)0;
     }
     else {
 
         T u = t / d;
-        T tmp = math::nd4j_sqrt<T,T>(1. + u*u);
-        rotation.p(0,0, u / tmp);
-        rotation.p(1,1, u / tmp);
-        rotation.p(0,1, 1.f / tmp);
-        rotation.p(1,0, -rotation.e<T>(0,1));
+        T tmp = math::nd4j_sqrt<T,T>((T)1.f + u*u);
+        rotation.r<T>(0,0) = rotation.r<T>(1,1) = u / tmp;
+        rotation.r<T>(0,1) =  (T)1.f / tmp;
+        rotation.r<T>(1,0) =  -rotation.t<T>(0,1);
     }
 
     m.assign(mmul(rotation, m));
 
-    auto _x = m.e<T>(0,0);
-    auto _y = m.e<T>(0,1);
-    auto _z = m.e<T>(1,1);
+    createJacobiRotation(m.t<T>(0,0), m.t<T>(0,1), m.t<T>(1,1), right);
 
-    createJacobiRotation(_x, _y, _z, right);
-
-    m.p<T>(0, 0, _x);
-    m.p<T>(0, 1, _y);
-    m.p<T>(1, 1, _z);
-
-    auto temp = right.transpose();
-    left.assign(mmul(rotation, temp));
+    left.assign(mmul(rotation, right.transpose()));
 }
 
 
@@ -261,7 +291,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
     const T precision  = (T)2.f * DataTypeUtils::eps<T>();
     const T almostZero = DataTypeUtils::min<T>();
 
-    T scale = matrix.reduceNumber(reduce::AMax).e<T>(0);
+    T scale = matrix.reduceNumber(reduce::AMax).template t<T>(0);
     if(scale== (T)0.f)
         scale = (T)1.f;
 
@@ -285,13 +315,12 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
     }
     else if(_rows < _cols) {
 
-        auto matrixT = matrix.transpose();
-        HHcolPivQR qr(matrixT / scale);
+        HHcolPivQR qr(matrix.transpose() / scale);
         _m.assign(qr._qr({0,_rows, 0,_rows}));
         _m.fillAsTriangular<T>(0., 0, 0, _m, 'l');
         _m.transposei();
 
-        HHsequence  hhSeg(qr._qr, qr._coeffs, 'u');          // type = 'u' is not mistake here !
+        HHsequence hhSeg(qr._qr, qr._coeffs, 'u');          // type = 'u' is not mistake here !
 
         if(_fullUV)
             hhSeg.applyTo(_v);
@@ -305,7 +334,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
     }
     else {
 
-        _m.assign(static_cast<const NDArray&>(matrix({0,_diagSize, 0,_diagSize})) / scale);
+        _m.assign(matrix({0,_diagSize, 0,_diagSize}) / scale);
 
         if(_calcU)
             _u.setIdentity();
@@ -316,7 +345,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
 
     T maxDiagElem = 0.;
     for(int i = 0; i < _diagSize; ++i) {
-        T current = math::nd4j_abs<T>(_m.e<T>(i,i));
+        T current = math::nd4j_abs<T>(_m.t<T>(i,i));
         if(maxDiagElem < current )
             maxDiagElem = current;
     }
@@ -333,29 +362,27 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
 
                 T threshold = math::nd4j_max<T>(almostZero, precision * maxDiagElem);
 
-                if(math::nd4j_abs<T>(_m.e<T>(p,q)) > threshold || math::nd4j_abs<T>(_m.e<T>(q,p)) > threshold){
+                if(math::nd4j_abs<T>(_m.t<T>(p,q)) > threshold || math::nd4j_abs<T>(_m.t<T>(q,p)) > threshold){
 
                     stop = false;
 
                     // if(isBlock2x2NotDiag(_m, p, q, maxDiagElem))
                     {
-                        auto rotLeft = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
-                        auto rotRight = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
+                        NDArray rotLeft(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
+                        NDArray rotRight(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext());
                         svd2x2(_m, p, q, rotLeft, rotRight);
 
                         mulRotationOnLeft(p, q, _m, rotLeft);
 
-                        if(_calcU) {
-                            auto temp = rotLeft.transpose();
-                            mulRotationOnRight(p, q, _u, temp);
-                        }
+                        if(_calcU)
+                            mulRotationOnRight(p, q, _u, rotLeft.transpose());
 
                         mulRotationOnRight(p, q, _m, rotRight);
 
                         if(_calcV)
                             mulRotationOnRight(p, q, _v, rotRight);
 
-                        maxDiagElem = math::nd4j_max<T>(maxDiagElem, math::nd4j_max<T>(math::nd4j_abs<T>(_m.e<T>(p,p)), math::nd4j_abs<T>(_m.e<T>(q,q))));
+                        maxDiagElem = math::nd4j_max<T>(maxDiagElem, math::nd4j_max<T>(math::nd4j_abs<T>(_m.t<T>(p,p)), math::nd4j_abs<T>(_m.t<T>(q,q))));
                     }
                 }
             }
@@ -363,8 +390,10 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
     }
 
     for(int i = 0; i < _diagSize; ++i) {
-        _s.p(i, math::nd4j_abs<T>(_m.e<T>(i,i)));
-        if(_calcU && _m.e<T>(i,i) < (T)0.) {
+
+        _s.r<T>(i) = math::nd4j_abs<T>(_m.t<T>(i,i));
+
+        if(_calcU && _m.t<T>(i,i) < (T)0.) {
             auto temp = _u({0,0, i,i+1}, true);
             temp.applyTransform(transform::Neg, temp, nullptr);
         }
@@ -375,7 +404,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
     for(int i = 0; i < _diagSize; i++) {
 
         int pos = (_s({i,-1, 0,0}).indexReduceNumber(indexreduce::IndexMax, nullptr)).template e<int>(0);
-        T maxSingVal =  _s({i,-1, 0,0}).reduceNumber(reduce::Max).template e<T>(0);
+        T maxSingVal = _s({i,-1, 0,0}).reduceNumber(reduce::Max).template t<T>(0);
 
         if(maxSingVal == (T)0.)
             break;
@@ -384,34 +413,24 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
 
             pos += i;
 
-            T _e0 = _s.e<T>(i);
-            T _e1 = _s.e<T>(pos);
-            _s.p(pos, _e0);
-            _s.p(i, _e1);
-            //math::nd4j_swap<T>(_s(i), _s(pos));
+            math::nd4j_swap<T>(_s.r<T>(i), _s.r<T>(pos));
 
             if(_calcU) {
                 auto temp1 = _u({0,0, pos,pos+1}, true);
                 auto temp2 = _u({0,0, i,i+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                temp1.swapUnsafe(temp2);
             }
 
             if(_calcV) {
                 auto temp1 = _v({0,0, pos, pos+1}, true);
                 auto temp2 = _v({0,0, i, i+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
+                temp1.swapUnsafe(temp2);
             }
         }
     }
 }
 
 
-
-
 template class ND4J_EXPORT JacobiSVD<float>;
 template class ND4J_EXPORT JacobiSVD<float16>;
 template class ND4J_EXPORT JacobiSVD<bfloat16>;
diff --git a/libnd4j/include/helpers/jacobiSVD.h b/libnd4j/include/helpers/jacobiSVD.h
index f6f161bbb..615811e9a 100644
--- a/libnd4j/include/helpers/jacobiSVD.h
+++ b/libnd4j/include/helpers/jacobiSVD.h
@@ -31,13 +31,13 @@ namespace helpers {
 template <typename T>
 class JacobiSVD {
 
-    public:                
+    public:
 
         NDArray _m;
         NDArray _s;          // vector with singular values
         NDArray _u;
         NDArray _v;
-    
+
         int _diagSize;
         int _rows;
         int _cols;
@@ -52,7 +52,8 @@ class JacobiSVD {
         bool isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem);
 
         static bool createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation);
-        
+        static void createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation);
+
         static void svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right);
 
         static void mulRotationOnLeft(const int i, const int j, NDArray& block, const NDArray& rotation);
diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h
index 8cde62ea1..65cf29b66 100644
--- a/libnd4j/include/helpers/shape.h
+++ b/libnd4j/include/helpers/shape.h
@@ -528,7 +528,7 @@ namespace shape {
  * Returns the element wise stride for this information
  * buffer
  */
-   ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *buffer);
+   ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *shapeInfo);
 
 
     /**
diff --git a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu
index 334584fab..6d2bcadf5 100644
--- a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu
+++ b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu
@@ -31,23 +31,37 @@ namespace sd {
         auto tid = blockIdx.x * blockDim.x + threadIdx.x;
         int totalThreads = gridDim.x * blockDim.x;
 
-        __shared__ Nd4jLong resultLength;
+        __shared__ Nd4jLong resultLength, xEws, yEws;
+        __shared__ bool sameOffsets, sameOrders;
         __shared__ T* input;
         __shared__ T* output;
+
         if (0 == threadIdx.x) {
            resultLength = shape::length(theFirstShape);
            input = reinterpret_cast<T*>(theSecondBuffer);
            output = reinterpret_cast<T*>(theFirstBuffer);
+
+           sameOffsets = shape::haveSameShapeAndStrides(theFirstShape, theSecondShape);
+           sameOrders  = shape::order(theFirstShape) == shape::order(theSecondShape);
+
+           xEws = shape::elementWiseStride(theFirstShape);
+           yEws = shape::elementWiseStride(theSecondShape);
         }
         __syncthreads();
 
         for (int i = tid; i < resultLength; i += totalThreads) {
-            auto xEws = shape::order(theFirstShape)  == 'c'? shape::elementWiseStride(theFirstShape) :1;
-            auto yEws = shape::order(theSecondShape) == 'c'? shape::elementWiseStride(theSecondShape):1;
-
-            auto xOffset = shape::getIndexOffset(i * xEws, theFirstShape);
-            auto yOffset = shape::getIndexOffset(i * yEws, theSecondShape);
-            sd::math::nd4j_swap(output[xOffset], input[yOffset]);
+            if(sameOrders && xEws > 0 && yEws > 0) {
+                sd::math::nd4j_swap(output[i*xEws], input[i*yEws]);
+            }
+            else if(sameOffsets) {
+                const auto offset = shape::getIndexOffset(i, theFirstShape);
+                sd::math::nd4j_swap(output[offset], input[offset]);
+            }
+            else{
+                const auto xOffset = shape::getIndexOffset(i, theFirstShape);
+                const auto yOffset = shape::getIndexOffset(i, theSecondShape);
+                sd::math::nd4j_swap(output[xOffset], input[yOffset]);
+            }
         }
     }
 
diff --git a/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp
new file mode 100644
index 000000000..37472008d
--- /dev/null
+++ b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp
@@ -0,0 +1,53 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#include <system/op_boilerplate.h>
+#if NOT_EXCLUDED(OP_sqrtm)
+#include <ops/declarable/helpers/sqrtm.h>
+#include <ops/declarable/CustomOperations.h>
+
+
+namespace sd   {
+namespace ops  {
+
+CONFIGURABLE_OP_IMPL(sqrtm, 1, 1, false, 0, 0) {
+
+    auto input  = INPUT_VARIABLE(0);
+    auto output = OUTPUT_VARIABLE(0);
+
+    REQUIRE_TRUE(input->rankOf() > 1, 0, "CONFIGURABLE_OP sqrtm: input array rank is required to be > 1, but got %i instead !", input->rankOf());
+    REQUIRE_TRUE(input->sizeAt(-2) == input->sizeAt(-1), 0, "CONFIGURABLE_OP sqrtm: two last dimensions of input array should be square matrices, but got such wrong shape instead: %s!", ShapeUtils::shapeAsString(input).c_str());
+
+    helpers::sqrtm(block.launchContext(), input, output);
+
+    return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+DECLARE_TYPES(sqrtm) {
+    getOpDescriptor()->setAllowedInputTypes(sd::DataType::ANY)->setAllowedOutputTypes({ALL_FLOATS});
+}
+
+
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/generic/blas/svd.cpp b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp
similarity index 100%
rename from libnd4j/include/ops/declarable/generic/blas/svd.cpp
rename to libnd4j/include/ops/declarable/generic/linalg/svd.cpp
diff --git a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp
index c9d23753c..49ec1e135 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp
@@ -55,13 +55,13 @@ namespace sd {
                 isLower = !isLower;
             };
 
-            auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, useAdjoint, z);
+            auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, false, z);
             if (input != a)
                 delete input;
 
             return Status::OK();
         }
-        
+
         DECLARE_SHAPE_FN(triangular_solve) {
             auto in0 = inputShape->at(1);
             auto in1 = inputShape->at(1);
diff --git a/libnd4j/include/ops/declarable/headers/blas.h b/libnd4j/include/ops/declarable/headers/blas.h
index 09215e113..6fd5a3894 100644
--- a/libnd4j/include/ops/declarable/headers/blas.h
+++ b/libnd4j/include/ops/declarable/headers/blas.h
@@ -24,7 +24,7 @@
 
 namespace sd {
     namespace ops {
-        
+
         /**
          * This op is general matmum implementation. Depending on inputs dimensionality output result might be different.
          * matrix x matrix = BLAS gemm
@@ -75,11 +75,11 @@ namespace sd {
          * alpha: vector of T
          * beta: vector of T
          * ...: A, B matrices sequentially. i.e: AAAAABBBBB
-         * 
+         *
          * Integer arguments:
          * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments
          * batchCount - number of operations in this batch
-         * 
+         *
          * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch.
          */
         #if NOT_EXCLUDED(OP_batched_gemm)
@@ -88,25 +88,39 @@ namespace sd {
 
         /**
          * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array:
-         * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) 
+         * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :])
          *
          * Input array:
          * x[..., Rows, Cols], the necessary condition is: rank of x >= 2
-         * 
+         *
          * Outputs arrays:
          * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols
          * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors
          * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors
-         * 
+         *
          * Integer arguments:
          * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case
          * IArgs[1] - bool, whether to calculate full-sized u and v
          * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely:
          *            if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied
-         *            Recommended value is 16. 
+         *            Recommended value is 16.
          */
         #if NOT_EXCLUDED(OP_svd)
-        DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3);   
+        DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3);
+        #endif
+
+        /**
+         * calculates square root of matrix such that
+         * x[..., M, M] = z[..., M, M] x z[..., M, M]
+         *
+         * Input array:
+         * x[..., M, M],  the necessary condition is: rank of x >= 2 and equality of last two dimensions
+         *
+         * Outputs arrays:
+         * z - same shape as x
+         */
+        #if NOT_EXCLUDED(OP_sqrtm)
+        DECLARE_CONFIGURABLE_OP(sqrtm, 1, 1, false, 0, 0);
         #endif
     }
 }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp
index ec06610b8..0056fec6d 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp
@@ -118,7 +118,7 @@ static void betaIncForArray(sd::LaunchContext * context, const NDArray& a, const
 
     auto func = PRAGMA_THREADS_FOR {
         for (auto i = start; i < stop; i++)
-            output.t<T>(i) = betaIncCore<T>(a.t<T>(i), b.t<T>(i), x.t<T>(i));
+            output.r<T>(i) = betaIncCore<T>(a.t<T>(i), b.t<T>(i), x.t<T>(i));
     };
 
     samediff::Threads::parallel_for(func, 0, xLen);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp
index 15ea569e8..ba04fd9aa 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp
@@ -73,7 +73,7 @@ namespace helpers {
                                    bool setUp = (theSame && row >= 0 && col >= 0 && row < rowDim && col < colDim) ||
                                                 (!theSame);
                                    if (setUp) {
-                                       outMatrix->t<T>(i, j, pos) = patch->e<T>(row, col, pixel);
+                                       outMatrix->r<T>(i, j, pos) = patch->e<T>(row, col, pixel);
                                    }
                                    pos++;
                                }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
index d2c918da9..7317f8a73 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
@@ -73,7 +73,7 @@ namespace helpers {
                 else if (val >= nudged_max)
                     val = nudged_max;
                 // quantization itself
-                output->t<T>(e + i) = math::nd4j_floor<T,T>((val - nudged_min)/scale + T(0.5)) * scale + nudged_min;
+                output->r<T>(e + i) = math::nd4j_floor<T,T>((val - nudged_min)/scale + T(0.5)) * scale + nudged_min;
             }
         }
     }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
index 2f0f00779..68b2130ac 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
@@ -318,7 +318,7 @@ namespace helpers {
                         }
                         // copy pixel over all channels
                         for (Nd4jLong e = 0; e < channels; e++)
-                            output->t<T>(b, y, x, e) = images->t<T>(b, inY, inX, e);
+                            output->r<T>(b, y, x, e) = images->t<T>(b, inY, inX, e);
                     }
                 }
             }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp
index 675fb2794..204b05530 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp
@@ -40,7 +40,7 @@ namespace helpers {
 
         for (auto x = 0; x < lastDims.size(); x++) {
             for (auto r = 0; r < rows; r++) {
-                 lastDims[x]->t<T>(r,r) = (T)value;
+                 lastDims[x]->r<T>(r,r) = (T)value;
             }
         }
 
@@ -71,7 +71,7 @@ namespace helpers {
             if (err) return err;
             // alternate moment: inverse lower triangular matrix to solve equation A'x = b' => L^Tx = L^-1 * b'
             // solve one upper triangular system (to avoid float problems)
-            
+
             // 5. Solve two triangular systems:
             auto rightB = rightOutput.ulike();
             helpers::triangularSolveFunctor(context, &leftOutput, &rightOutput, true, false, &rightB);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
index 0f435cfdb..482709455 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
@@ -34,7 +34,7 @@ namespace helpers {
 
         if (theFirst != theSecond)
             for (int i = 0; i < matrix->columns(); i++) {
-                math::nd4j_swap(matrix->t<T>(theFirst, i), matrix->t<T>(theSecond, i));
+                math::nd4j_swap(matrix->r<T>(theFirst, i), matrix->r<T>(theSecond, i));
             }
     }
     BUILD_SINGLE_TEMPLATE(template void swapRows_, (NDArray* matrix, int theFirst, int theSecond), FLOAT_TYPES);
@@ -71,12 +71,12 @@ namespace helpers {
 
         auto invertDiagonals = PRAGMA_THREADS_FOR {
             for (int i = start; i < stop; i += increment)
-                invertedMatrix->t<T>(i, i) /= inputMatrix->t<T>(i, i);
+                invertedMatrix->r<T>(i, i) /= inputMatrix->t<T>(i, i);
         };
 
         auto invertSubDiagonals = PRAGMA_THREADS_FOR {
             for (int i = start; i < stop; i += increment)
-                invertedMatrix->t<T>(i, i - 1) -= (inputMatrix->t<T>(i, i - 1) * invertedMatrix->t<T>(i - 1, i - 1) / inputMatrix->t<T>(i, i));
+                invertedMatrix->r<T>(i, i - 1) -= (inputMatrix->t<T>(i, i - 1) * invertedMatrix->t<T>(i - 1, i - 1) / inputMatrix->t<T>(i, i));
         };
 
         samediff::Threads::parallel_for(invertDiagonals, 0, n, 1);
@@ -86,7 +86,7 @@ namespace helpers {
         for (int i = 1; i < n; i++) {
             for (int j = 0; j < i - 1 ; j++)
                 for (int k = 0; k < i; k++)
-                    invertedMatrix->t<T>(i, j) -= ((invertedMatrix->t<T>(k, j) * inputMatrix->t<T>(i, k) / inputMatrix->t<T>(i, i)));
+                    invertedMatrix->r<T>(i, j) -= ((invertedMatrix->t<T>(k, j) * inputMatrix->t<T>(i, k) / inputMatrix->t<T>(i, i)));
         }
 
     }
@@ -108,13 +108,13 @@ namespace helpers {
 
         auto invertDiagonals = PRAGMA_THREADS_FOR {
             for (auto i = start; i < stop; i += increment)
-                invertedMatrix->t<T>(i, i) /= inputMatrix->t<T>(i, i);
+                invertedMatrix->r<T>(i, i) /= inputMatrix->t<T>(i, i);
         };
 
         //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance()->elementwiseThreshold())
         auto invertUpDiagonals = PRAGMA_THREADS_FOR {
             for (auto i = start; i < stop; i += increment)
-                invertedMatrix->t<T>(i, i + 1) -= (inputMatrix->t<T>(i, i + 1) * invertedMatrix->t<T>(i + 1, i + 1) /
+                invertedMatrix->r<T>(i, i + 1) -= (inputMatrix->t<T>(i, i + 1) * invertedMatrix->t<T>(i + 1, i + 1) /
                                                    inputMatrix->t<T>(i, i));
         };
 
@@ -125,7 +125,7 @@ namespace helpers {
         for (auto i = n - 2; i >= 0; i--) {
             for (auto j = i + 2; j < n; j++)
                 for (auto k = i; k < n; k++)
-                    invertedMatrix->t<T>(i, j) -= ((invertedMatrix->t<T>(k, j) * inputMatrix->t<T>(i, k) / inputMatrix->t<T>(i, i)));
+                    invertedMatrix->r<T>(i, j) -= ((invertedMatrix->t<T>(k, j) * inputMatrix->t<T>(i, k) / inputMatrix->t<T>(i, i)));
         }
     }
 
@@ -169,10 +169,10 @@ namespace helpers {
                     swapCount++;
 
                 for( int j = i + 1; j < rowNum; j++ ) {
-                    compoundMatrix.t<T>(j, i) /= compoundMatrix.t<T>(i, i);
+                    compoundMatrix.r<T>(j, i) /= compoundMatrix.t<T>(i, i);
                     //PRAGMA_OMP_PARALLEL_FOR
                     for( int k = i + 1; k < rowNum; k++ ) {
-                        compoundMatrix.t<T>(j, k) -= compoundMatrix.t<T>(j, i) * compoundMatrix.t<T>(i, k);
+                        compoundMatrix.r<T>(j, k) -= compoundMatrix.t<T>(j, i) * compoundMatrix.t<T>(i, k);
                     }
                 }
             }
@@ -190,7 +190,7 @@ namespace helpers {
             for (auto i = 0; i < rowNum; i++) {
                 for (auto j = 0; j < columnNum; j++) {
                     if (permutationMatrix.t<T>(i, j) != 0) {
-                        permutaionVector.template t<I>(i) = j;
+                        permutaionVector.template r<I>(i) = j;
                     }
                 }
             }
@@ -268,7 +268,7 @@ namespace helpers {
                     sum += compound->t<T>(i,j) * compound->t<T>(j,k);
 
                 // Evaluating U(i, k)
-                compound->t<T>(i, k) = input.t<T>(i, k) - sum;
+                compound->r<T>(i, k) = input.t<T>(i, k) - sum;
             }
 
             // Lower Triangular
@@ -279,7 +279,7 @@ namespace helpers {
                     sum += compound->t<T>(k,j) * compound->t<T>(j, i);
 
                 // Evaluating L(k, i)
-                compound->t<T>(k, i) = (input.t<T>(k, i) - sum) / compound->t<T>(i,i);
+                compound->r<T>(k, i) = (input.t<T>(k, i) - sum) / compound->t<T>(i,i);
             }
         }
     }
@@ -412,12 +412,12 @@ template <typename T>
             lowerMatrix.setIdentity(); // set up U to identity matrix
             for (int k = 1; k < n; k++) {  // and then put all values under main diagonal on to it
                 for (int j = 0; j < k; j++)
-                    lowerMatrix.template t<T>(k, j) = compound.template t<T>(k, j);
+                    lowerMatrix.template r<T>(k, j) = compound.template t<T>(k, j);
             }
             upperMatrix.setIdentity(); // set up U to identity matrix
             for (int k = 0; k < n; k++) {  // and then put all values under main diagonal on to it
                 for (int j = k; j < n; j++)
-                    upperMatrix.template t<T>(k, j) = compound.template e<T>(k, j);
+                    upperMatrix.template r<T>(k, j) = compound.template t<T>(k, j);
             }
             invertUpperMatrix(&upperMatrix, &matrix);
 
@@ -426,7 +426,7 @@ template <typename T>
             sd::MmulHelper::mmul(&matrix, &upperMatrix, &compound, 1.0, 0.0);
             sd::MmulHelper::mmul(&compound, &permutation, &matrix, 1.0, 0.0);
             for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) {
-                output->t<T>(k) = matrix.template t<T>(row++);
+                output->r<T>(k) = matrix.template t<T>(row++);
             }
         }
 
@@ -470,7 +470,7 @@ template <typename T>
             invertLowerMatrix(&matrix, &lowerMatrix);
 
             for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) {
-                output->t<T>(k) = lowerMatrix.template t<T>(row++);
+                output->r<T>(k) = lowerMatrix.template t<T>(row++);
             }
         }
 
@@ -597,7 +597,7 @@ template <typename T>
 
         for (Nd4jLong e = 0; e < totalCount; e++) {
             for (size_t i = 0; i < n; ++i)
-                output->t<T>(e) += sd::math::nd4j_log<T,T>(sd::math::nd4j_pow<T,T,T>(matricies.at(e)->t<T>(i, i), T(2)));
+                output->r<T>(e) += sd::math::nd4j_log<T,T>(sd::math::nd4j_pow<T,T,T>(matricies.at(e)->t<T>(i, i), T(2)));
         }
         return ND4J_STATUS_OK;
     }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp
index d748aa6b0..2a0c5af95 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp
@@ -47,8 +47,8 @@ static void mergeMaxIndex_(const std::vector<const NDArray*>& inArrs, NDArray& o
                     idx = static_cast<Z>(i);
                 }
             }
-            // FIXME, use .r<Z>(e)
-            output.t<Z>(e) = static_cast<Z>(idx);
+
+            output.r<Z>(e) = static_cast<Z>(idx);
         }
     };
 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp
index 1e96211b3..b0e1553e4 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp
@@ -68,7 +68,7 @@ namespace helpers {
                                                                         beta != nullptr ? copyBeta->t<T>(e) * u : u);
                     }
                     else {
-                        output->t<T>(pos + e) = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
+                        output->r<T>(pos + e) = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
                                                                         beta != nullptr ? copyBeta->t<T>(e) * u : u);
                     }
         }
@@ -121,7 +121,7 @@ namespace helpers {
                 if (directOut)
                     outputBuf[pos + e] = x;
                 else
-                    output->t<T>(pos + e) = x;
+                    output->r<T>(pos + e) = x;
             }
         }
     }
@@ -146,7 +146,7 @@ namespace helpers {
         else {
             PRAGMA_OMP_PARALLEL_FOR
             for (Nd4jLong i = 0; i < output->lengthOf(); i++) {
-                output->t<T>(i) = rng.relativeT<T>(i, minVal, maxVal);
+                output->r<T>(i) = rng.relativeT<T>(i, minVal, maxVal);
             }
         }
     }
@@ -159,12 +159,12 @@ namespace helpers {
     // methods: gumbel trick + softmax + argmax
     template <typename Tx, typename Tz>
     void fillRandomMultiNomial_(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) {
-        
+
         const Tx* x = input.bufferAsT<Tx>();
         Tz* z = output.bufferAsT<Tz>();
-        
+
         Tx minVal = DataTypeUtils::min<Tx>();
-        Tx maxVal = 1.0; 
+        Tx maxVal = 1.0;
 
         auto dimA = (0 == dimC) ? 1 : 0;
         const Nd4jLong batchValue = output.sizeAt(dimC);
@@ -178,7 +178,7 @@ namespace helpers {
         auto func = PRAGMA_THREADS_FOR_2D{
                 for (auto nBatchIndex = start_x; nBatchIndex < stop_x; nBatchIndex += inc_x) {
                     for (auto nSampleIndexInBatch = start_y; nSampleIndexInBatch < stop_y; nSampleIndexInBatch += inc_y) {
-                        
+
                         const Tx* xTad = x + (nBatchIndex * xDimCstride);
                         Tz* zTad = z + (nBatchIndex * zDimCstride);
                         Tz& arg = zTad[nSampleIndexInBatch * zDimAstride];
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
index 2e336da23..a7f40899a 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
@@ -54,8 +54,8 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator&
                 T t0 = input.t<T>(i);
                 T t1 = input.t<T>(r);
                 //math::nd4j_swap<T>(input(i), input(r));
-                input.t<T>(i) = t1;
-                input.t<T>(r) = t0;
+                input.r<T>(i) = t1;
+                input.r<T>(r) = t0;
             }
         }
         else {
@@ -66,11 +66,11 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator&
             // FIXME: parallelism!!
             for(int i = firstDim-1; i > 0; --i) {
                 int r = rng.relativeInt(i) % i;
-                output.t<T>(i) = input.t<T>(indices[r]);
+                output.r<T>(i) = input.t<T>(indices[r]);
                 if(i == r)
                     continue;
 
-                output.t<T>(r) = input.t<T>(indices[i]);
+                output.r<T>(r) = input.t<T>(indices[i]);
                 math::nd4j_swap<int>(indices[i], indices[r]);
             }
             rng.rewindH(firstDim-1);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp
index e57264e66..50ff79679 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp
@@ -46,7 +46,7 @@ namespace helpers {
                     idx = indices->e<Nd4jLong>(e);
                     val = input->t<T>(e);
                 }
-                output->t<T>(idx) = val;
+                output->r<T>(idx) = val;
             }
         }
         else {
@@ -65,7 +65,7 @@ namespace helpers {
                 if (indices->e<int>(i) == idx) {
 
                     for (Nd4jLong e = 0; e < maxT->lengthOf(); e++) {
-                       maxT->t<T>(e) = sd::math::nd4j_max(maxT->t<T>(e), listOfTensors.at(i)->t<T>(e));
+                       maxT->r<T>(e) = sd::math::nd4j_max(maxT->t<T>(e), listOfTensors.at(i)->t<T>(e));
                     }
                 }
                 else {
@@ -96,7 +96,7 @@ namespace helpers {
                     idx = indices->e<Nd4jLong>(e);
                     val = input->t<T>(e);
                 }
-                output->t<T>(idx) = val;
+                output->r<T>(idx) = val;
             }
         }
         else {
@@ -417,7 +417,7 @@ namespace helpers {
                 for (size_t idx = 1; idx < fi->second.size(); ++idx) {
                     val = sd::math::nd4j_min(val, input->t<T>(fi->second.at(idx)));
                 }
-                output->t<T>(fi->first) = val;
+                output->r<T>(fi->first) = val;
             }
         }
         else {
@@ -436,7 +436,7 @@ namespace helpers {
                     auto minT = listOfTensors.at(fi->second.at(idx));
 
                     for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) {
-                        outputT->t<T>(e) = sd::math::nd4j_min(minT->t<T>(e), outputT->t<T>(e));
+                        outputT->r<T>(e) = sd::math::nd4j_min(minT->t<T>(e), outputT->t<T>(e));
                     }
                 }
                 //outputT->assign(maxT);
@@ -890,7 +890,7 @@ namespace helpers {
                 for (auto e = start; e < stop; e++) {
                     auto classNum = indices->e<Nd4jLong>(e);
                     if (sd::math::nd4j_abs(tempRes.t<T>(classNum) - input->t<T>(e)) < 1.e-6)
-                        output->t<T>(e) = gradOut->t<T>(classNum);
+                        output->r<T>(e) = gradOut->t<T>(classNum);
                 }
             };
 
@@ -913,7 +913,7 @@ namespace helpers {
 
                     for (Nd4jLong e = 0; e < current->lengthOf(); e++) {
                         if (sd::math::nd4j_abs(listOfBPTensors.at(classNum)->t<T>(e) - current->t<T>(e)) < 1.e-6)
-                            currentOut->t<T>(e) = currentGradOut->t<T>(e);
+                            currentOut->r<T>(e) = currentGradOut->t<T>(e);
                     }
                 }
             //};
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp
index 8e25c4690..3c8ce573e 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp
@@ -31,7 +31,7 @@ namespace helpers {
             for (auto i = start_x; i < stop_x; i += inc_x)
                 for (auto k = start_y; k < stop_y; k += inc_y)
                     if (i < input->t<I>(k))
-                        output->t<B>(k * maxIndex + i) = B(true); //,  T(1.0f));
+                        output->r<B>(k * maxIndex + i) = B(true); //,  T(1.0f));
         };
 
         samediff::Threads::parallel_for(func, 0, maxIndex, 1, 0, input->lengthOf(), 1);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp
index 9a06975aa..a0034bb5d 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp
@@ -43,7 +43,7 @@ namespace helpers {
             for (auto batch = start; batch < stop; batch++) {
                 for (Nd4jLong r = 0; r < rows; r++) {
                     for (Nd4jLong c = 0; c < r; c++) {
-                        math::nd4j_swap(outputPart[batch]->t<T>(r, c) , outputPart[batch]->t<T>(c, r));
+                        math::nd4j_swap(outputPart[batch]->r<T>(r, c) , outputPart[batch]->r<T>(c, r));
                     }
                 }
             }
@@ -67,7 +67,7 @@ namespace helpers {
 
         for (auto batch = 0; batch < permutationsPart.size(); ++batch) {
             for (Nd4jLong row = 0; row < PPart[batch]->rows(); ++row) {
-                PPart[batch]->t<T>(row, permutationsPart[batch]->t<int>(row)) = T(1.f);
+                PPart[batch]->r<T>(row, permutationsPart[batch]->t<int>(row)) = T(1.f);
             }
         }
 
@@ -78,7 +78,7 @@ namespace helpers {
         ResultSet leftLowerPart = leftLower.allTensorsAlongDimension({-2, -1});
         for (auto i = 0; i < leftLowerPart.size(); i++) {
             for (Nd4jLong r = 0; r < leftLowerPart[i]->rows(); r++)
-                leftLowerPart[i]->t<T>(r,r) = (T)1.f;
+                leftLowerPart[i]->r<T>(r,r) = (T)1.f;
         }
         // stage 2: triangularSolveFunctor for Lower with given b
         helpers::triangularSolveFunctor(context, &leftLower, &rightPermuted, true, false, &rightOutput);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp
index c4f99af3f..6910960ef 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp
@@ -27,911 +27,6 @@ namespace sd {
 namespace ops {
 namespace helpers {
 
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV ) {
-
-    if(matrix.rankOf() != 2 || matrix.isScalar())
-        throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !");
-
-    const int rows = matrix.sizeAt(0);
-    const int cols = matrix.sizeAt(1);
-
-    if(cols > rows) {
-
-        _transp = true;
-        _diagSize = rows;
-    }
-    else {
-
-        _transp = false;
-        _diagSize = cols;
-    }
-
-    _switchSize = switchSize;
-    _calcU = calcU;
-    _calcV = calcV;
-    _fullUV = fullUV;
-
-    if (_transp)
-        math::nd4j_swap<bool>(_calcU, _calcV);
-
-    _s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
-    _m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
-    _m.assign(0.);
-
-    if (_calcU)
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
-    else
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
-    _u.assign(0.);
-
-    if (_calcV) {
-        _v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
-        _v.assign(0.);
-    }
-
-    evalData(matrix);
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV, const char t) {
-
-    if(matrix.rankOf() != 2 || matrix.isScalar())
-        throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !");
-
-    const int rows = matrix.sizeAt(0);
-    const int cols = matrix.sizeAt(1);
-
-    if(cols > rows) {
-
-        _transp = true;
-        _diagSize = rows;
-    }
-    else {
-
-        _transp = false;
-        _diagSize = cols;
-    }
-
-    _switchSize = switchSize;
-    _calcU = calcU;
-    _calcV = calcV;
-    _fullUV = fullUV;
-
-    if (_transp)
-        math::nd4j_swap<bool>(_calcU, _calcV);
-
-    _s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
-    _m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
-    _m.assign(0.f);
-
-    if (_calcU)
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
-    else
-        _u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
-    _u.assign(0.);
-
-    if (_calcV) {
-        _v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
-        _v.assign(0.);
-    }
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::deflation1(int col1, int shift, int ind, int size) {
-
-    if(ind <= 0)
-        throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !");
-
-    int first = col1 + shift;
-    T cos = _m.e<T>(first, first);
-    T sin = _m.e<T>(first+ind, first);
-    T denom = math::nd4j_sqrt<T, T>(cos*cos + sin*sin);
-
-    if (denom == (T)0.) {
-
-        _m.p(first+ind, first+ind, 0.f);
-        return;
-    }
-
-    cos /= denom;
-    sin /= denom;
-
-    _m.p(first,first, denom);
-    _m.p(first+ind, first, 0.f);
-    _m.p(first+ind, first+ind, 0.f);
-
-    auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2},  _m.getContext());
-    rotation.p(0, 0, cos);
-    rotation.p(0, 1, -sin);
-    rotation.p(1, 0, sin);
-    rotation.p(1, 1, cos);
-
-    if (_calcU) {
-        auto temp = _u({col1,col1+size+1, 0,0}, true);
-        JacobiSVD<T>::mulRotationOnRight(col1, col1+ind, temp, rotation);
-    }
-    else
-        JacobiSVD<T>::mulRotationOnRight(col1, col1+ind, _u, rotation);
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, int ind2, int size) {
-
-    if(ind1 >= ind2)
-        throw std::runtime_error("ops::helpers::SVD::deflation2 method: input intes must satisfy condition ind1 < ind2 !");
-
-    if(size <= 0)
-        throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !");
-
-    T cos = _m.e<T>(col1M+ind1, col1M);
-    T sin = _m.e<T>(col1M+ind2, col1M);
-    T denom = math::nd4j_sqrt<T,T>(cos*cos + sin*sin);
-
-    if (denom == (T)0.)  {
-
-      _m.p(col1M + ind1, col1M + ind1, _m.e<T>(col1M + ind2, col1M + ind2));
-      return;
-    }
-
-    cos /= denom;
-    sin /= denom;
-    _m.p(col1M + ind1, col1M, denom);
-    _m.p(col1M + ind2, col1M + ind2, _m.e<T>(col1M + ind1, col1M + ind1));
-    _m.p(col1M + ind2, col1M, 0.f);
-
-    auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2}, _m.getContext());
-    rotation.p(0,0, cos);
-    rotation.p(1,1, cos);
-
-    rotation.p(0,1, -sin);
-    rotation.p(1,0, sin);
-
-    if (_calcU) {
-        auto temp = _u({col1U,col1U+size+1, 0,0}, true);
-        JacobiSVD<T>::mulRotationOnRight(col1U+ind1, col1U+ind2, temp, rotation);
-    }
-    else
-        JacobiSVD<T>::mulRotationOnRight(col1U+ind1, col1U+ind2, _u, rotation);
-
-    if (_calcV)  {
-        auto temp = _v({row1W,row1W+size, 0,0}, true);
-        JacobiSVD<T>::mulRotationOnRight(col1W+ind1, col1W+ind2, temp, rotation);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////
-// has effect on block from (col1+shift, col1+shift) to (col2+shift, col2+shift) inclusively
-template <typename T>
-void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int shift)
-{
-
-    const int len = col2 + 1 - col1;
-
-    auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true));
-
-    auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c');
-
-    const T almostZero = DataTypeUtils::min<T>();
-    T maxElem;
-    if(len == 1)
-        maxElem = math::nd4j_abs<T>(diagInterval.template e<T>(0));
-    else
-        maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
-    T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e<T>(0);
-
-    T eps = math::nd4j_max<T>(almostZero, DataTypeUtils::eps<T>() * maxElem);
-    T epsBig = (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(maxElem0, maxElem);
-
-    if(diagInterval.template e<T>(0) < epsBig)
-        diagInterval.p(Nd4jLong(0), epsBig);
-
-    for(int i=1; i < len; ++i)
-        if(math::nd4j_abs<T>(colVec0->template e<T>(i)) < eps)
-            colVec0->p(i, 0.f);
-
-    for(int i=1; i < len; i++)
-        if(diagInterval.template e<T>(i) < epsBig) {
-            deflation1(col1, shift, i, len);
-            for(int i = 0; i < len; ++i)
-                diagInterval.p(i, _m.e<T>(col1+shift+i,col1+shift+i));
-        }
-
-    {
-
-        bool totDefl = true;
-        for(int i=1; i < len; i++)
-            if(colVec0->template e<T>(i) >= almostZero) {
-                totDefl = false;
-                break;
-            }
-
-        int* permut = nullptr;
-        ALLOCATE(permut, _m.getContext()->getWorkspace(), 3*_diagSize, int);
-        {
-            permut[0] = 0;
-            int p = 1;
-
-            for(int i=1; i<len; ++i)
-                if(math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero)
-                    permut[p++] = i;
-
-            int k = 1, m = ind+1;
-
-            for( ; p < len; ++p) {
-                if(k > ind)
-                    permut[p] = m++;
-                else if(m >= len)
-                    permut[p] = k++;
-                else if(diagInterval.template e<T>(k) < diagInterval.template e<T>(m))
-                    permut[p] = m++;
-                else
-                    permut[p] = k++;
-            }
-        }
-
-        if(totDefl) {
-            for(int i=1; i<len; ++i) {
-                int ki = permut[i];
-                if(math::nd4j_abs<T>(diagInterval.template e<T>(ki)) < almostZero || diagInterval.template e<T>(0) < diagInterval.template e<T>(ki))
-                    permut[i-1] = permut[i];
-                else {
-                    permut[i-1] = 0;
-                    break;
-                }
-            }
-        }
-
-        int *tInd = permut + len;
-        int *tCol = permut + 2*len;
-
-        for(int m = 0; m < len; m++) {
-            tCol[m] = m;
-            tInd[m] = m;
-        }
-
-        for(int i = totDefl ? 0 : 1; i < len; i++) {
-
-            const int ki = permut[len - (totDefl ? i+1 : i)];
-            const int jac = tCol[ki];
-
-            T _e0 = diagInterval.template e<T>(jac);
-            //math::nd4j_swap<T>(diagInterval)(i), (*diagInterval)(jac));
-            diagInterval.p(jac, diagInterval.template e<T>(i));
-            diagInterval.p(i, _e0);
-
-            if(i!=0 && jac!=0) {
-                _e0 = colVec0->template e<T>(jac);
-                //math::nd4j_swap<T>((*colVec0)(i), (*colVec0)(jac));
-                colVec0->p(jac, colVec0->template e<T>(i));
-                colVec0->p(i, _e0);
-            }
-
-            if (_calcU) {
-                auto temp1 = _u({col1,col1+len+1, col1+i,  col1+i+1}, true);
-                auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
-            }
-            else {
-                auto temp1 = _u({0,2, col1+i,   col1+i+1}, true);
-                auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
-            }
-
-            if(_calcV) {
-                auto temp1 = _v({row1W,row1W+len, col1W+i,   col1W+i+1}, true);
-                auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
-            }
-
-            const int tI = tInd[i];
-            tCol[tI] = jac;
-            tCol[ki] = i;
-            tInd[jac] = tI;
-            tInd[i] = ki;
-        }
-
-        RELEASE(permut, _m.getContext());
-    }
-
-    {
-        int i = len-1;
-
-        while(i > 0 && (math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
-            --i;
-
-        for(; i > 1; --i) {
-            if( (diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
-                if (math::nd4j_abs<T>(diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) >= epsBig)
-                    throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !");
-                deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len);
-            }
-        }
-    }
-
-    delete colVec0;
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& diagShifted, const T shift) {
-
-    auto len = permut.lengthOf();
-    T res = 1.;
-    T item;
-    for(int i=0; i<len; ++i) {
-        auto j = permut.e<int>(i);
-        item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff));
-        res += item * col0.e<T>(j);
-    }
-
-    return res;
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) {
-
-    auto len = col0.lengthOf();
-    auto curLen = len;
-
-    while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f)
-        --curLen;
-
-    for (int k = 0; k < len; ++k)  {
-
-        if (col0.e<T>(k) == (T)0.f || curLen==1) {
-
-            singVals.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
-            mus.p(k, 0.f);
-            shifts.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
-            continue;
-        }
-
-        T left = diag.e<T>(k);
-        T right;
-
-        if(k==curLen-1)
-            right = diag.e<T>(curLen-1) + col0.reduceNumber(reduce::Norm2).e<T>(0);
-        else {
-
-            int l = k+1;
-            while(col0.e<T>(l) == (T)0.f) {
-                ++l;
-                if(l >= curLen)
-                    throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !");
-            }
-
-            right = diag.e<T>(l);
-        }
-
-        T mid = left + (right - left) / (T)2.;
-        T fMid = secularEq(mid, col0, diag, permut, diag, 0.);
-        T shift = (k == curLen-1 || fMid > (T)0.) ? left : right;
-
-        auto diagShifted = diag - shift;
-
-        T muPrev, muCur;
-        if (shift == left) {
-            muPrev = (right - left) * 0.1;
-            if (k == curLen-1)
-                muCur = right - left;
-            else
-                muCur = (right - left) * 0.5;
-        }
-        else {
-            muPrev = -(right - left) * 0.1;
-            muCur  = -(right - left) * 0.5;
-        }
-
-        T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift);
-        T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift);
-
-        if (math::nd4j_abs<T>(fPrev) < math::nd4j_abs<T>(fCur)) {
-            math::nd4j_swap<T>(fPrev, fCur);
-            math::nd4j_swap<T>(muPrev, muCur);
-        }
-
-        bool useBisection = fPrev * fCur > (T)0.;
-        while (fCur != (T).0 &&
-               math::nd4j_abs<T>(muCur - muPrev) > (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(math::nd4j_abs<T>(muCur), math::nd4j_abs<T>(muPrev))
-               && math::nd4j_abs<T>(fCur - fPrev) > DataTypeUtils::eps<T>() && !useBisection) {
-
-            T a = (fCur - fPrev) / ((T)1./muCur - (T)1./muPrev);
-            T jac = fCur - a / muCur;
-            T muZero = -a/jac;
-            T fZero = secularEq(muZero, col0, diag, permut, diagShifted, shift);
-
-            muPrev = muCur;
-            fPrev = fCur;
-            muCur = muZero;
-            fCur = fZero;
-
-            if (shift == left  && (muCur < (T)0. || muCur > right - left))
-                useBisection = true;
-            if (shift == right && (muCur < -(right - left) || muCur > (T)0.))
-                useBisection = true;
-            if (math::nd4j_abs<T>(fCur) > math::nd4j_abs<T>(fPrev) &&  math::nd4j_abs<T>(fCur - fPrev) > (T)16. * DataTypeUtils::eps<T>())
-                useBisection = true;
-        }
-
-
-        if (useBisection) {
-
-            T leftShifted, rightShifted;
-            if (shift == left) {
-                leftShifted = DataTypeUtils::min<T>();
-                rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6);
-            }
-            else {
-
-                leftShifted = -(right - left) * (T)0.6;
-                rightShifted = -DataTypeUtils::min<T>();
-            }
-
-            T fLeft  = secularEq(leftShifted,  col0, diag, permut, diagShifted, shift);
-            T fRight = secularEq(rightShifted, col0, diag, permut, diagShifted, shift);
-            // if(fLeft * fRight >= (T)0.)
-                // throw "ops::helpers::SVD::calcSingVals method: fLeft * fRight >= (T)0. !";
-
-            while (rightShifted - leftShifted > (T)2.f * DataTypeUtils::eps<T>() * math::nd4j_max<T>(math::nd4j_abs<T>(leftShifted), math::nd4j_abs<T>(rightShifted))) {
-
-                T midShifted = (leftShifted + rightShifted) / (T)2.;
-                fMid = secularEq(midShifted, col0, diag, permut, diagShifted, shift);
-                if (fLeft * fMid < (T)0.)
-                    rightShifted = midShifted;
-                else {
-                    leftShifted = midShifted;
-                    fLeft = fMid;
-                }
-            }
-            muCur = (leftShifted + rightShifted) / (T)2.;
-        }
-        singVals.p(k, shift + muCur);
-        shifts.p(k, shift);
-        mus.p(k, muCur);
-    }
-
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals,  const NDArray& shifts, const NDArray& mus, NDArray& zhat) {
-
-    int n = col0.lengthOf();
-    int m = permut.lengthOf();
-    if(m==0) {
-        zhat.assign(0.);
-        return;
-    }
-
-    int last = permut.e<int>(m-1);
-
-    for (int k = 0; k < n; ++k) {
-
-        if (col0.e<T>(k) == (T)0.f)
-            zhat.p(k, (T)0.f);
-        else {
-            T dk   = diag.e<T>(k);
-            T prod = (singVals.e<T>(last) + dk) * (mus.e<T>(last) + (shifts.e<T>(last) - dk));
-
-            for(int l = 0; l<m; ++l) {
-                int i = permut.e<int>(l);
-                if(i!=k) {
-                    int j = i<k ? i : permut.e<int>(l-1);
-                    prod *= ((singVals.e<T>(j)+dk) / ((diag.e<T>(i)+dk))) * ((mus.e<T>(j)+(shifts.e<T>(j)-dk)) / ((diag.e<T>(i)-dk)));
-                }
-            }
-            T tmp = math::nd4j_sqrt<T,T>(prod);
-            zhat.p(k, col0.e<T>(k) > (T)0.f ? tmp : -tmp);
-        }
-    }
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArray& perm, const NDArray& singVals,
-                             const NDArray& shifts, const NDArray& mus, NDArray& U, NDArray& V) {
-
-    int n = zhat.lengthOf();
-    int m = perm.lengthOf();
-
-    for (int k = 0; k < n; ++k) {
-
-        auto colU = new NDArray(U({0,0, k,k+1}, true));
-        *colU = 0.;
-        NDArray* colV = nullptr;
-
-        if (_calcV) {
-            colV = new NDArray(V({0,0, k,k+1}, true));
-            *colV = 0.;
-        }
-
-        if (zhat.e<T>(k) == (T)0.f) {
-            colU->p(k, 1.f);
-
-            if (_calcV)
-                colV->p(k, 1.f);
-        }
-        else {
-
-            for(int l = 0; l < m; ++l) {
-                int i = perm.e<int>(l);
-                U.p(i,k, zhat.e<T>(i)/(((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
-            }
-            U.p(n,k, 0.f);
-            *colU /= colU->reduceNumber(reduce::Norm2);
-
-            if (_calcV) {
-
-                for(int l = 1; l < m; ++l){
-                    int i = perm.e<T>(l);
-                    V.p(i,k, diag.e<T>(i) * zhat.e<T>(i) / (((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
-                }
-                V.p(0,k, -1.f);
-                *colV /= colV->reduceNumber(reduce::Norm2);
-            }
-        }
-        delete colU;
-        if (_calcV)
-            delete colV;
-    }
-
-    auto colU = U({0,0, n,n+1}, true);
-    colU = 0.;
-    colU.p(n, 1.);
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDArray& V) {
-
-    const T almostZero = DataTypeUtils::min<T>();
-    auto col0 = _m({col1, col1+size, col1, col1+1}, true);
-    auto diag = static_cast<const NDArray&>(_m({col1, col1+size, col1, col1+size}, true).diagonal('c'));
-
-    diag.p(Nd4jLong(0), T(0));
-    singVals = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    U = NDArrayFactory::create<T>(_u.ordering(), {size+1, size+1}, _u.getContext());
-    if (_calcV)
-        V = NDArrayFactory::create<T>(_v.ordering(), {size, size}, _v.getContext());
-
-    int curSize = size;
-    while(curSize > 1 && diag.template e<T>(curSize-1) == (T)0.f)
-        --curSize;
-
-    int m = 0;
-    std::vector<T> indices;
-    for(int k = 0; k < curSize; ++k)
-        if(math::nd4j_abs<T>(col0.template e<T>(k)) > almostZero)
-            indices.push_back((T)k);
-
-    auto permut = NDArrayFactory::create<T>(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext());
-    auto shifts = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    auto mus    = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-    auto zhat   = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
-
-    calcSingVals(col0, diag, permut, singVals, shifts, mus);
-    perturb(col0, diag, permut, singVals, shifts, mus, zhat);
-    calcSingVecs(zhat, diag, permut, singVals, shifts, mus, U, V);
-
-    for(int i=0; i<curSize-1; ++i) {
-
-        if(singVals.e<T>(i) > singVals.e<T>(i+1)) {
-            T _e0 = singVals.e<T>(i);
-            T _e1 = singVals.e<T>(i+1);
-            //math::nd4j_swap<T>(singVals(i),singVals(i+1));
-            singVals.p(i, _e1);
-            singVals.p(i+1, _e0);
-
-            auto temp1 = U({0,0, i,i+1}, true);
-            auto temp2 = U({0,0, i+1,i+2}, true);
-            auto temp3 = temp1;
-            temp1.assign(temp2);
-            temp2.assign(temp3);
-
-            if(_calcV) {
-                auto temp1 = V({0,0, i,i+1}, true);
-                auto temp2 = V({0,0, i+1,i+2}, true);
-                auto temp3 = temp1;
-                temp1.assign(temp2);
-                temp2.assign(temp3);
-            }
-        }
-    }
-
-    auto temp1 = singVals({0,curSize, 0,0}, true);
-    for (int e = 0; e < curSize / 2; ++e) {
-        T tmp = temp1.e<T>(e);
-        temp1.p(e, temp1.e<T>(curSize-1-e));
-        temp1.p(curSize-1-e, tmp);
-    }
-
-    auto temp2 = U({0,0, 0,curSize}, true);
-    for(int i = 0; i < curSize/2; ++i) {
-        auto temp3 = temp2({0,0, i,i+1}, true);
-        auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
-        auto temp5 = temp3;
-        temp3.assign(temp4);
-        temp4.assign(temp5);
-    }
-
-    if (_calcV) {
-        auto temp2 = V({0,0, 0,curSize}, true);
-        for(int i = 0; i < curSize/2; ++i) {
-            auto temp3 = temp2({0,0, i,i+1}, true);
-            auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
-            auto temp5 = temp3;
-            temp3.assign(temp4);
-            temp4.assign(temp5);
-        }
-    }
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-template<typename T>
-void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shift) {
-
-    // requires rows = cols + 1;
-    const int n = col2 - col1 + 1;
-    const int k = n/2;
-    const T almostZero = DataTypeUtils::min<T>();
-    T alphaK;
-    T betaK;
-    T r0;
-    T lambda, phi, c0, s0;
-    auto l = NDArrayFactory::create<T>(_u.ordering(), {1, k}, _u.getContext());
-    auto f = NDArrayFactory::create<T>(_u.ordering(), {1, n-k-1}, _u.getContext());
-
-    if(n < _switchSize) {
-
-        JacobiSVD<T> jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV);
-
-        if (_calcU) {
-            auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true);
-            temp.assign(jac._u);
-        }
-        else {
-            auto temp1 = _u({0,1, col1,col1+n+1}, true);
-            temp1.assign(jac._u({0,1, 0,0}, true));
-            auto temp2 = _u({1,2, col1,col1+n+1}, true);
-            temp2.assign(jac._u({n,n+1, 0,0}, true));
-        }
-
-        if (_calcV) {
-            auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true);
-            temp.assign(jac._v);
-        }
-
-        auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true);
-        temp.assign(0.);
-        auto diag = _m.diagonal('c');
-        diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
-
-        return;
-    }
-
-    alphaK = _m.e<T>(col1 + k, col1 + k);
-    betaK  = _m.e<T>(col1 + k + 1, col1 + k);
-
-    DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift);
-    DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1);
-
-    if (_calcU) {
-        lambda = _u.e<T>(col1 + k, col1 + k);
-        phi    = _u.e<T>(col1 + k + 1, col2 + 1);
-    }
-    else {
-        lambda = _u.e<T>(1, col1 + k);
-        phi    = _u.e<T>(0, col2 + 1);
-    }
-
-    r0 = math::nd4j_sqrt<T, T>((math::nd4j_abs<T>(alphaK * lambda) * math::nd4j_abs<T>(alphaK * lambda)) + math::nd4j_abs<T>(betaK * phi) * math::nd4j_abs<T>(betaK * phi));
-
-    if(_calcU) {
-        l.assign(_u({col1+k,  col1+k+1,  col1,col1+k}, true));
-        f.assign(_u({col1+k+1,col1+k+2,  col1+k+1,col1+n}, true));
-    }
-    else {
-        l.assign(_u({1,2, col1, col1+k}, true));
-        f.assign(_u({0,1, col1+k+1, col1+n}, true));
-    }
-
-    // UofSVD.printIndexedBuffer();
-    // VofSVD.printIndexedBuffer();
-    // singVals.printIndexedBuffer();
-    // printf("!! \n");
-
-    if (_calcV)
-        _v.p(row1W+k, col1W, 1.f);
-
-    if (r0 < almostZero){
-        c0 = 1.;
-        s0 = 0.;
-    }
-    else {
-        c0 = alphaK * lambda / r0;
-        s0 = betaK * phi / r0;
-    }
-
-    if (_calcU) {
-
-        auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true);
-        NDArray q1(temp);
-
-        for (int i = col1 + k - 1; i >= col1; --i) {
-            auto temp = _u({col1,col1+k+1, i+1,i+2}, true);
-            temp.assign(_u({col1, col1+k+1, i, i+1}, true));
-        }
-
-        _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0);
-        _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0));
-        _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast<const NDArray&>(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0);
-        _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0;
-    }
-    else  {
-
-        T q1 = _u.e<T>(0, col1 + k);
-
-        for (int i = col1 + k - 1; i >= col1; --i)
-            _u.p(0, i+1, _u.e<T>(0, i));
-
-        _u.p(0, col1, q1 * c0);
-        _u.p(0, col2+1, -q1*s0);
-        _u.p(1, col1, _u.e<T>(1, col2+1) * s0);
-        _u.p(1, col2 + 1,  _u.e<T>(1, col2 + 1) * c0);
-        _u({1,2,  col1+1, col1+k+1}, true) = 0.f;
-        _u({0,1,  col1+k+1, col1+n}, true) = 0.f;
-    }
-
-    _m.p(col1 + shift, col1 + shift, r0);
-    auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true);
-    temp1.assign(l*alphaK);
-    auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true);
-    temp2.assign(f*betaK);
-
-    deflation(col1, col2, k, row1W, col1W, shift);
-
-    NDArray UofSVD, VofSVD, singVals;
-    calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD);
-
-    if(_calcU) {
-        auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, UofSVD));
-    }
-    else {
-        auto pTemp = _u({0,0, col1,col1+n+1}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, UofSVD));
-    }
-
-    if (_calcV) {
-        auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true);
-        auto temp = pTemp;
-        pTemp.assign(mmul(temp, VofSVD));
-    }
-
-    auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true);
-    blockM = 0.f;
-    auto diag = blockM.diagonal('c');
-    diag.assign(singVals);
-}
-
-//////////////////////////////////////////////////////////////////////////
-template<typename T>
-void SVD<T>::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDArray& U, const NDArray& V) {
-
-    if (_calcU) {
-
-        int colsU = _fullUV ? hhU.rows() : _diagSize;
-        auto temp1 = NDArrayFactory::create<T>(_u.ordering(), {hhU.rows(), colsU}, _u.getContext());
-        temp1.setIdentity();
-        _u = temp1;
-
-        auto temp2 = _u({0,_diagSize, 0,_diagSize}, true);
-        temp2.assign(V({0,_diagSize, 0,_diagSize}, true));
-        const_cast<HHsequence&>(hhU).mulLeft(_u);
-    }
-
-    if (_calcV) {
-
-        int colsV = _fullUV ? hhV.rows() : _diagSize;
-        auto temp1 = NDArrayFactory::create<T>(_v.ordering(), {hhV.rows(), colsV}, _v.getContext());
-        temp1.setIdentity();
-        _v = temp1;
-
-        auto temp2 = _v({0,_diagSize, 0,_diagSize}, true);
-        temp2.assign(U({0,_diagSize, 0,_diagSize}, true));
-        const_cast<HHsequence&>(hhV).mulLeft(_v);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-void SVD<T>::evalData(const NDArray& matrix) {
-
-    const T almostZero = DataTypeUtils::min<T>();
-
-    if(matrix.sizeAt(1) < _switchSize) {
-
-        JacobiSVD<T> jac(matrix, _calcU, _calcV, _fullUV);
-
-        if(_calcU)
-            _u = jac._u;
-        if(_calcV)
-            _v = jac._v;
-
-        _s.assign(jac._s);
-
-        return;
-    }
-
-    T scale = matrix.reduceNumber(reduce::AMax).e<T>(0);
-
-    if(scale == (T)0.)
-        scale = 1.;
-
-    NDArray copy;
-    if(_transp)
-        copy = matrix.transpose();
-    else
-        copy = matrix / scale;
-
-    BiDiagonalUp biDiag(copy);
-
-    _u = 0.;
-    _v = 0.;
-
-    auto temp1 = biDiag._HHbidiag.transpose();
-    auto temp2 = _m({0,_diagSize, 0,0}, true);
-    temp2.assign(temp1);
-
-    auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true);
-    temp3.assign(0.);
-
-    DivideAndConquer(0, _diagSize - 1, 0, 0, 0);
-
-    for (int i = 0; i < _diagSize; ++i) {
-        T a = math::nd4j_abs<T>(_m.e<T>(i, i));
-        _s.p(i, a * scale);
-        if (a < almostZero) {
-            auto temp = _s({i+1,_diagSize, 0,0}, true);
-            temp.assign(0.);
-            break;
-        }
-        else if (i == _diagSize-1)
-            break;
-    }
-
-    if(_transp)
-        exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u);
-    else
-        exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v);
-}
-
-
-BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT SVD,,FLOAT_TYPES);
-
-
 //////////////////////////////////////////////////////////////////////////
 // svd operation, this function is not method of SVD class, it is standalone function
 template <typename T>
@@ -972,9 +67,10 @@ static void svd_(const NDArray* x, const std::vector<NDArray*>& outArrs, const b
     }
 }
 
-    void svd(sd::LaunchContext * context, const NDArray* x, const std::vector<NDArray*>& outArrs, const bool fullUV, const bool calcUV, const int switchNum) {
-        BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES);
-    }
+//////////////////////////////////////////////////////////////////////////
+void svd(sd::LaunchContext * context, const NDArray* x, const std::vector<NDArray*>& outArrs, const bool fullUV, const bool calcUV, const int switchNum) {
+    BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES);
+}
 
 
 }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp
index fdab43261..65edeb71b 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp
@@ -73,8 +73,8 @@ namespace helpers {
                     NDArray sortedVals = NDArrayFactory::create<T>('c', {k}, input->getContext());
                     NDArray topIndices = NDArrayFactory::create<Nd4jLong>('c', {k}, input->getContext());
                     for (uint pos = 0; pos < k; ++pos) {
-                        topIndices.t<Nd4jLong>(pos) = pos;
-                        topValues.t<T>(pos) = trial.t<T>(pos);
+                        topIndices.r<Nd4jLong>(pos) = pos;
+                        topValues.r<T>(pos) = trial.t<T>(pos);
                     }
                     //std::vector<T> sortedVals(topValues);
                     sortedVals.assign(topValues);// = NDArrayFactory::create<T>('c', {k});
@@ -93,9 +93,9 @@ namespace helpers {
                                 T* topBegin = reinterpret_cast<T*>(topValues.buffer());
                                 T* topEnd = topBegin + k;
                                 auto exchangePos = std::distance(topBegin, std::find(topBegin, topEnd, sortedVals.t<T>(0)));
-                                topValues.t<T>(exchangePos) = val; //*exchangeIt = val;
-                                topIndices.t<Nd4jLong>(exchangePos) = i;
-                                sortedVals.t<T>(0) = val; // suppress in sorted
+                                topValues.r<T>(exchangePos) = val; //*exchangeIt = val;
+                                topIndices.r<Nd4jLong>(exchangePos) = i;
+                                sortedVals.r<T>(0) = val; // suppress in sorted
                                 //std::sort(sortedVals.begin(), sortedVals.end()); // sorted in ascending order
                                 SpecialMethods<T>::sortGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), false);
                             }
@@ -107,7 +107,7 @@ namespace helpers {
                         for (Nd4jLong j = 0; j < width; j++)
                             for (uint pos = 0; pos < k; ++pos)
                                 if (topValues.t<T>(pos) == trial.t<T>(j))
-                                    topIndices.t<Nd4jLong>(pos) = j;
+                                    topIndices.r<Nd4jLong>(pos) = j;
                     }
                     else { // else sort by indices
                         std::map<Nd4jLong, T> sortValsMap;
@@ -121,8 +121,8 @@ namespace helpers {
                         //});
                         Nd4jLong e = 0;
                         for (auto it = sortValsMap.begin(); it != sortValsMap.end(); ++it, e++) {
-                            topIndices.t<Nd4jLong>(e) = it->first;
-                            topValues.t<T>(e) = it->second;
+                            topIndices.r<Nd4jLong>(e) = it->first;
+                            topValues.r<T>(e) = it->second;
                         }
 
                     }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp
index bcf406392..86847da16 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp
@@ -39,17 +39,17 @@ namespace helpers {
      *
      * */
     template <typename T>
-    static void lowerTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) {
+    static void lowerTriangularSolve(sd::LaunchContext * context, NDArray const * leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) {
         auto rows = leftInput->rows();
         auto cols = rightInput->columns();
-        //output->t<T>(0,0) = rightInput->t<T>(0,0) / leftInput->t<T>(0,0);
+        //output->r<T>(0,0) = rightInput->t<T>(0,0) / leftInput->t<T>(0,0);
         for (Nd4jLong r = 0; r < rows; r++) {
             for (Nd4jLong j = 0; j < cols; j++) {
                 auto sum = rightInput->t<T>(r, j);
                 for (Nd4jLong c = 0; c < r; c++) {
                     sum -= leftInput->t<T>(r, c) * output->t<T>(c, j);
                 }
-                output->t<T>(r, j) = sum / leftInput->t<T>(r, r);
+                output->r<T>(r, j) = unitsOnDiag?sum: sum / leftInput->t<T>(r, r);
             }
         }
     }
@@ -69,7 +69,7 @@ namespace helpers {
      * */
 
     template <typename T>
-    static void upperTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) {
+    static void upperTriangularSolve(sd::LaunchContext* context, NDArray const* leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) {
         auto rows = leftInput->rows();
         auto cols = rightInput->columns();
         for (Nd4jLong r = rows; r > 0; r--) {
@@ -78,11 +78,31 @@ namespace helpers {
                 for (Nd4jLong c = r; c < rows; c++) {
                     sum -= leftInput->t<T>(r - 1, c) * output->t<T>(c, j);
                 }
-                output->t<T>(r - 1, j) = sum / leftInput->t<T>(r - 1, r - 1);
+                output->r<T>(r - 1, j) = unitsOnDiag? sum : sum / leftInput->t<T>(r - 1, r - 1);
             }
         }
     }
 
+    ///  triangularSolve2D - 2D implementation of triangularSolveFunctor
+    /// \tparam T - type of NDArray output
+    /// \param context - launch context pointer
+    /// \param leftInput  - T matrix of equation Tx = b
+    /// \param rightInput  - b vector of equation Tx = b
+    /// \param lower - lower or upper triangular matrix
+    /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed
+    /// \param output - output vector (x on equation Tx = b)
+    ///
+    template <typename T>
+    void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) {
+        if (lower) {
+            lowerTriangularSolve<T>(context, &leftInput, &rightInput, unitsOnDiag, &output);
+        }
+        else {
+            upperTriangularSolve<T>(context, &leftInput, &rightInput, unitsOnDiag, &output);
+        }
+    }
+    BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES);
+
     template <typename T>
     static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) {
         auto leftPart = leftInput->allTensorsAlongDimension({-2, -1});
@@ -92,9 +112,9 @@ namespace helpers {
         auto batchLoop = PRAGMA_THREADS_FOR {
             for (auto i = start; i < stop; i++) {
                 if (lower) {
-                    lowerTriangularSolve<T>(context, leftPart[i], rightPart[i], adjoint, outputPart[i]);
+                    lowerTriangularSolve<T>(context, leftPart[i], rightPart[i], false, outputPart[i]);
                 } else {
-                    upperTriangularSolve<T>(context, leftPart[i], rightPart[i], adjoint, outputPart[i]);
+                    upperTriangularSolve<T>(context, leftPart[i], rightPart[i], false, outputPart[i]);
                 }
             }
         };
@@ -116,13 +136,13 @@ namespace helpers {
                 if (!lower) {
                     for (Nd4jLong r = 0; r < rows; r++) {
                         for (Nd4jLong c = 0; c <= r; c++) {
-                            outputPart[batch]->t<T>(r, c) = inputPart[batch]->t<T>(c, r);
+                            outputPart[batch]->r<T>(r, c) = inputPart[batch]->t<T>(c, r);
                         }
                     }
                 } else {
                     for (Nd4jLong r = 0; r < rows; r++) {
                         for (Nd4jLong c = r; c < cols; c++) {
-                            outputPart[batch]->t<T>(r, c) = inputPart[batch]->t<T>(c, r);
+                            outputPart[batch]->r<T>(r, c) = inputPart[batch]->t<T>(c, r);
                         }
                     }
                 }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp
index 4194e976c..eb2074865 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp
@@ -38,7 +38,7 @@ static void triuBP_(sd::LaunchContext * context, const NDArray& input, const NDA
     auto func = PRAGMA_THREADS_FOR {
         for (auto i = start; i < stop; i++) {
             if (dOdI.t<T>(i) != static_cast<T>(0.f))
-                dOdI.t<T>(i) = static_cast<T>(1.f);
+                dOdI.r<T>(i) = static_cast<T>(1.f);
         }
     };
     samediff::Threads::parallel_for(func, 0, dLen);
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
index c8f26de6f..6302262be 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
@@ -41,9 +41,9 @@ namespace sd {
              *
              * */
             template <typename T>
-            static __device__ void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape,
+            static _CUDA_HD void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape,
                                                         T const* rightInput, Nd4jLong const* rightInputShape,
-                                                        bool const adjoint, T* output, Nd4jLong const* outputShape,
+                                                        bool const unitOnDiag, T* output, const Nd4jLong* outputShape,
                                                         Nd4jLong rows, Nd4jLong cols) {
 
                 for (auto r = 0; r < rows; r++) {
@@ -62,7 +62,7 @@ namespace sd {
                             auto zcIndex = shape::getOffset(outputShape, posZ, 0);
                             sum -= leftInput[xcIndex] * output[zcIndex];
                         }
-                        output[zIndex] = sum / leftInput[xIndex];
+                        output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex];
                     }
                 }
             }
@@ -82,9 +82,9 @@ namespace sd {
              * */
 
             template <typename T>
-            static __device__ void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape,
-                    T const* rightInput, Nd4jLong const* rightInputShape, bool const adjoint, T* output,
-                    Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) {
+            static _CUDA_HD void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape,
+                    T const* rightInput, Nd4jLong const* rightInputShape, bool const unitOnDiag, T* output,
+                    const Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) {
 
                 for (auto r = rows; r > 0; r--) {
                     for (auto j = 0; j < cols; j++) {
@@ -101,16 +101,16 @@ namespace sd {
                             auto xcIndex = shape::getOffset(leftInputShape, pos, 0);
                             sum -= leftInput[xcIndex] * output[zcIndex];
                         }
-                        output[zIndex] = sum / leftInput[xIndex];
+                        output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex];
                     }
                 }
             }
 
             template <typename T>
             static __global__ void triangularSolveKernel(T const* leftInput, Nd4jLong const* leftPartShape,
-                    T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const adjoint, T* output,
-                    Nd4jLong const* outputShape, Nd4jLong const* tadLeftShape, Nd4jLong const* tadLeftOffset, Nd4jLong const* tadRightShape,
-                    Nd4jLong const* tadRightOffset, Nd4jLong const* tadOutputShape, Nd4jLong const* tadOutputOffset, Nd4jLong batchNum) {
+                    T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const unitsOnDiag, T* output,
+                    const Nd4jLong* outputShape, const Nd4jLong* tadLeftShape, const Nd4jLong* tadLeftOffset, const Nd4jLong* tadRightShape,
+                    const Nd4jLong* tadRightOffset, const Nd4jLong* tadOutputShape, const Nd4jLong* tadOutputOffset, Nd4jLong batchNum) {
 
                 __shared__ Nd4jLong rows;
                 __shared__ Nd4jLong cols;
@@ -130,16 +130,16 @@ namespace sd {
                     auto pRightPart = rightInput + tadRightOffset[i];
                     auto pOutputPart = output + tadOutputOffset[i];
                     if (lower) {
-                        lowerTriangularSolve<T>(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols);
+                        lowerTriangularSolve<T>(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols);
                     } else {
-                        upperTriangularSolve<T>(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols);
+                        upperTriangularSolve<T>(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols);
                     }
                 }
             }
 
             template <typename T>
             static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput,
-                    bool lower, bool adjoint, NDArray* output) {
+                    bool lower, bool unitsOnDiag, NDArray* output) {
                 NDArray::prepareSpecialUse({output}, {leftInput, rightInput});
                 auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1});
                 auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1});
@@ -150,7 +150,7 @@ namespace sd {
                 T const* rightBuf = reinterpret_cast<T const*>(rightInput->specialBuffer());
                 T* outputBuf = reinterpret_cast<T*>(output->specialBuffer());
                 triangularSolveKernel<T><<<128, 128, 256, *stream>>>(leftBuf, leftInput->specialShapeInfo(),
-                        rightBuf, rightInput->specialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(),
+                        rightBuf, rightInput->specialShapeInfo(), lower, unitsOnDiag, outputBuf, output->specialShapeInfo(),
                         leftTads.specialShapeInfo(), leftTads.specialOffsets(), rightTads.specialShapeInfo(),
                         rightTads.specialOffsets(), outputTads.specialShapeInfo(), outputTads.specialOffsets(),
                         leftTads.numberOfTads());
@@ -161,8 +161,41 @@ namespace sd {
 
             }
 
-            int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) {
-                BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, adjoint, output), FLOAT_NATIVE);
+            ///  triangularSolve2D - 2D implementation of triangularSolveFunctor
+            /// \tparam T - type of NDArray output
+            /// \param context - launch context pointer
+            /// \param leftInput  - T matrix of equation Tx = b
+            /// \param rightInput  - b vector of equation Tx = b
+            /// \param lower - lower or upper triangular matrix
+            /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed
+            /// \param output - output vector (x on equation Tx = b)
+            ///
+            template <typename T>
+            void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) {
+
+                triangularSolveFunctor_<T>(context, const_cast<NDArray*>(&leftInput), const_cast<NDArray*>(&rightInput), lower, unitsOnDiag, &output);
+
+                // leftInput.syncToHost(); rightInput.syncToHost(); output.syncToHost();
+                // T const* pLeftPart = (T const*)leftInput.getBuffer();
+                // T const* pRightPart = (T const*)rightInput.getBuffer();
+                // T* pOutputPart = (T*)output.buffer();
+                // auto rows = leftInput.rows();
+                // auto cols = leftInput.columns();
+                // if (lower) {
+                //     lowerTriangularSolve<T>(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols);
+                // } else {
+                //     upperTriangularSolve<T>(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols);
+                // }
+                // output.syncToDevice();
+            }
+            BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES);
+//            template void triangularSolve2D<float>(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output);
+//            template void triangularSolve2D<bfloat16>(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output);
+//            template void triangularSolve2D<float16>(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output);
+//            template void triangularSolve2D<double>(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output);
+
+            int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output) {
+                BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, unitsOnDiag, output), FLOAT_NATIVE);
             }
 
             template <typename T>
@@ -229,6 +262,76 @@ namespace sd {
                 BUILD_SINGLE_SELECTOR(input->dataType(), adjointTriangularMatrix_, (context, input, lower, output), FLOAT_NATIVE);
             }
 
-        }
-    }
+/*
+            //////////////////////////////////////////////////////////////////////////
+            template <typename T>
+            void triangularSolve2D(sd::LaunchContext* context, NDArray const& A, NDArray const& b, bool const lower, bool const unitsOnDiag, NDArray& x) {
+
+                if(A.rankOf() != 2)
+                    throw std::runtime_error("triangularSolve2D: input matrix A must be 2D !");
+
+                int temp;
+
+                const bool isBvector = b.isCommonVector(temp);
+                const bool isXvector = x.isCommonVector(temp);
+
+                if(A.sizeAt(0) != (isBvector ? b.lengthOf() : b.sizeAt(0)))
+                    throw std::runtime_error("triangularSolve2D: A and b must have the same number of rows !");
+
+                if(A.sizeAt(1) != (isXvector ? x.lengthOf() : x.sizeAt(0)))
+                    throw std::runtime_error("triangularSolve2D: columns number of array A must be equal to rows number of array x !");
+
+                if(isBvector) {
+
+                    if(lower) {
+
+                        for (int i = 0; i < A.sizeAt(0); ++i) {
+                            T sum = b.t<T>(i);
+                            for (int j = 0; j < i; ++j)
+                                sum -= A.t<T>(i,j) * x.t<T>(j);
+                            x.r<T>(i) = unitsOnDiag ? sum : sum / A.t<T>(i,i);
+                        }
+                    }
+                    else {
+
+                        for (int i = A.sizeAt(0) - 1; i >= 0; --i) {
+                            T sum = b.t<T>(i);
+                            for (int j = i + 1; j < A.sizeAt(1); ++j)
+                                sum -= A.t<T>(i,j) * x.t<T>(j);
+                            x.r<T>(i) = unitsOnDiag ? sum : sum / A.t<T>(i,i);
+                        }
+                    }
+                }
+                else {
+
+                    if(lower) {
+
+                        for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) {
+                            for (int i = 0; i < A.sizeAt(0); ++i) {
+                                T sum = b.t<T>(i, bCol);
+                                for (int j = 0; j < i; ++j)
+                                    sum -= A.t<T>(i,j) * x.t<T>(j, bCol);
+                                x.r<T>(i, bCol) = unitsOnDiag ? sum : sum / A.t<T>(i,i);
+                           }
+                        }
+                    }
+                    else {
+
+                        for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) {
+                            for (int i = A.sizeAt(0) - 1; i >= 0; --i) {
+                                T sum = b.t<T>(i, bCol);
+                                for (int j = i + 1; j < A.sizeAt(1); ++j)
+                                    sum -= A.t<T>(i,j) * x.t<T>(j, bCol);
+                                x.r<T>(i, bCol) = unitsOnDiag ? sum : sum / A.t<T>(i,i);
+                            }
+                        }
+                    }
+                }
+            }
+            BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES);
+*/
+
+
+}
+}
 }
diff --git a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp
index bbcb1eca3..4baa36d65 100644
--- a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp
+++ b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp
@@ -50,6 +50,7 @@ namespace sd {
                 // make sure host buffer is updated
                 values.syncToHost();
                 indices.syncToHost();
+                output.syncToHost();
 
                 auto rank = output.rankOf();
 
diff --git a/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp
new file mode 100644
index 000000000..b8cc6d8ac
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * ThnIn program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which nIn available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * dnIntributed under the License nIn dnIntributed on an "AS nIn" BASnIn, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permnInsions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+
+#include <ops/declarable/CustomOperations.h>
+#include <helpers/Sqrtm.h>
+
+namespace sd 	  {
+namespace ops 	  {
+namespace helpers {
+
+//////////////////////////////////////////////////////////////////////////
+template <typename T>
+static void sqrtm_(const NDArray* x, NDArray* z) {
+
+
+    if(x->rankOf() == 2) {
+
+        ops::helpers::Sqrtm<T>::calc(*x, *z);
+    }
+    else {
+
+        auto listX = x->allTensorsAlongDimension({-2, -1});
+        auto listZ = z->allTensorsAlongDimension({-2, -1});
+
+        auto func = PRAGMA_THREADS_FOR {
+
+            for (auto i = start; i < stop; i++)
+                ops::helpers::Sqrtm<T>::calc(*listX.at(i), *listZ.at(i));
+        };
+
+        samediff::Threads::parallel_tad(func, 0, listX.size());
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z) {
+
+    x->syncToHost();
+    BUILD_SINGLE_SELECTOR(z->dataType(), sqrtm_, (x, z), FLOAT_TYPES);
+    z->syncToDevice();
+}
+
+
+
+}
+}
+}
diff --git a/libnd4j/include/ops/declarable/helpers/sqrtm.h b/libnd4j/include/ops/declarable/helpers/sqrtm.h
new file mode 100644
index 000000000..2a123d420
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/sqrtm.h
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author Yurii Shyrma (iuriish@yahoo.com)
+//
+
+#ifndef LIBND4J_SQRTM_HELPER_H
+#define LIBND4J_SQRTM_HELPER_H
+
+#include <ops/declarable/helpers/helpers.h>
+#include "array/NDArray.h"
+
+namespace sd      {
+namespace ops     {
+namespace helpers {
+
+//////////////////////////////////////////////////////////////////////////
+void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z);
+
+
+}
+}
+}
+
+#endif //LIBND4J_SQRTM_HELPER_H
diff --git a/libnd4j/include/ops/declarable/helpers/triangular_solve.h b/libnd4j/include/ops/declarable/helpers/triangular_solve.h
index 73965f8c5..94e0198af 100644
--- a/libnd4j/include/ops/declarable/helpers/triangular_solve.h
+++ b/libnd4j/include/ops/declarable/helpers/triangular_solve.h
@@ -26,7 +26,9 @@ namespace sd {
 namespace ops {
 namespace helpers {
 
-    int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output);
+    int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output);
+    template <typename T>
+    void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, const bool lower, const bool unitsOnDiag, NDArray& output);
     void adjointMatrix(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output);
 }
 }
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
index 4139e9785..e4391c688 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
@@ -47,7 +47,7 @@ TEST_F(DeclarableOpsTests11, test_listdiff_1) {
     auto result = op.evaluate({&x, &y}, {}, {});
     ASSERT_EQ(Status::OK(), result.status());
 
-    
+
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -392,10 +392,10 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test12) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
-    weights.t<double>(3) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
+    weights.r<double>(3) = 0.;
 
 
     sd::ops::log_loss_grad op;
@@ -431,9 +431,9 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test13) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
 
     sd::ops::log_loss_grad op;
     auto results = op.evaluate({&predictions, &weights, &labels}, {1e-7}, {3});
@@ -1608,7 +1608,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_2) {
 //    z->printIndexedBuffer("Solve 4x4");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, Solve_Test_3) {
@@ -1645,7 +1645,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_3) {
 //    z->printIndexedBuffer("Solve 4x4");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1678,7 +1678,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4) {
 //    exp.printBuffer("4 Expec 4x4");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, Solve_Test_4_1) {
@@ -1707,7 +1707,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_1) {
 //    exp.printBuffer("4 Expec 4x4");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, Solve_Test_4_2) {
@@ -1740,7 +1740,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_2) {
 //    exp.printBuffer("4_2 Triangular_Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1774,7 +1774,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_3) {
 //    exp.printBuffer("4_3 Triangular_Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1808,7 +1808,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_4) {
 //    exp.printBuffer("4_4 Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1842,7 +1842,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_5) {
 //    exp.printBuffer("4_5 Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1876,7 +1876,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_6) {
 //    exp.printBuffer("4_6 Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, Solve_Test_4_7) {
@@ -1913,7 +1913,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_7) {
 //    exp.printBuffer("4_7 Expec 3x3");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1947,7 +1947,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_5) {
 //    exp.printBuffer("4 Expec 4x4");
 
     ASSERT_TRUE(exp.equalsTo(z));
-    
+
 }
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, SolveLS_Test_1) {
@@ -2399,10 +2399,10 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test12) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
-    weights.t<double>(3) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
+    weights.r<double>(3) = 0.;
 
     sd::ops::mean_sqerr_loss_grad op;
     auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3});
@@ -2436,9 +2436,9 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test13) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
 
     sd::ops::mean_sqerr_loss_grad op;
     auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3});
@@ -2467,7 +2467,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test1) {
     ASSERT_EQ(Status::OK(), result.status());
     ASSERT_TRUE(exp.equalsTo(result.at(0)));
 
-    
+
 }
 
 TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) {
@@ -2478,7 +2478,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) {
     auto result = op.evaluate({&x, &y}, {}, {});
     ASSERT_EQ(Status::OK(), result.status());
     ASSERT_TRUE(exp.equalsTo(result.at(0)));
-    
+
 }
 
 TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) {
@@ -2490,7 +2490,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) {
     auto result = op.evaluate({&x, &y, &eps}, {}, {});
     ASSERT_EQ(Status::OK(), result.status());
     ASSERT_TRUE(exp.equalsTo(result.at(0)));
-    
+
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -2830,10 +2830,10 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test12) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
-    weights.t<double>(3) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
+    weights.r<double>(3) = 0.;
 
     sd::ops::absolute_difference_loss_grad op;
     auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3});
@@ -2867,9 +2867,9 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test13) {
     predictions.linspace(0.04, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
 
     sd::ops::absolute_difference_loss_grad op;
     auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3});
@@ -3305,10 +3305,10 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test12) {
     logits.linspace(-0.08, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
-    weights.t<double>(3) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
+    weights.r<double>(3) = 0.;
 
 
     sd::ops::sigm_cross_entropy_loss_grad op;
@@ -3344,9 +3344,9 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test13) {
     logits.linspace(-0.08, 0.04);
     labels.linspace(1);
     weights.assign(0.5);
-    weights.t<double>(0) = 0.;
-    weights.t<double>(1) = 0.;
-    weights.t<double>(2) = 0.;
+    weights.r<double>(0) = 0.;
+    weights.r<double>(1) = 0.;
+    weights.r<double>(2) = 0.;
 
     sd::ops::sigm_cross_entropy_loss_grad op;
     auto results = op.evaluate({&logits, &weights, &labels}, {0.3}, {3});
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
index c37f3fe4a..c7222e6f7 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
@@ -2065,500 +2065,6 @@ TEST_F(DeclarableOpsTests13, lstmLayer_12) {
     #endif
 }
 
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 3;
-
-    const int dataFormat = 0;       // [sL,bS,nIn]
-    const int directionMode = 0;    // forward
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = false;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;  // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 3;
-
-    const int dataFormat = 1;       // [bS,sL,nIn]
-    const int directionMode = 0;    // forward
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = false;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut]
-    const auto retLastH   = false;  // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector<bool>(), {0., 1.}, GradCheck::LossFunc::MEAN);
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) {
-
-    const int sL   = 4;
-    const int bS   = 3;
-    const int nIn  = 3;
-    const int nOut = 2;
-
-    const int dataFormat = 2;       // [bS, nIn, sL]
-    const int directionMode = 0;    // forward
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = true;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       //  clipping
-
-    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
-    NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 3;
-
-    const int dataFormat = 1;       // [bS,sL,nIn]
-    const int directionMode = 1;    // backward
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = false;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 2;
-
-    const int dataFormat = 2;       // [bS, nIn, sL]
-    const int directionMode = 1;    // backward
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = true;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
-    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 2;
-
-    const int dataFormat = 2;       // [bS, nIn, sL]
-    const int directionMode = 2;    // bidirectional sum
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = true;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
-    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 2;
-
-    const int dataFormat = 1;       // [bS,sL,nIn]
-    const int directionMode = 3;    // bidirectional concat
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = true;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
-    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) {
-
-    const int sL   = 3;
-    const int bS   = 2;
-    const int nIn  = 2;
-    const int nOut = 2;
-
-    const int dataFormat = 3;       // [sL, bS, nIn]
-    const int directionMode = 4;    // bidirectional extra output dim
-    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
-    const int cellAct = 0;          // tanh activation for cell state
-    const int outAct = 0;           // tanh activation for output
-
-    const bool hasBiases  = true;   // biases array is provided
-    const bool hasSeqLen  = true;  // seqLen array is not provided
-    const auto hasInitH   = true;   // initial output is provided
-    const auto hasInitC   = true;   // initial cell state is provided
-    const auto hasPH      = true;   // peephole connections are absent
-    const auto retFullSeq = true;   // dLdh per each time step
-    const auto retLastH   = true;   // output at last time step
-    const auto retLastC   = true;   // cells state at last time step
-
-    const double cellClip = 0.5;       // clipping
-
-    NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
-    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
-    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
-    NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
-
-    x.linspace(-2,0.1);
-    hI.linspace(-1.5,0.1);
-    cI.linspace(0.7,-0.1);
-    Wx.linspace(1,-0.1);
-    Wr.linspace(-1,0.1);
-    Wp.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
-
-    std::vector<double>   tArgs = {cellClip};
-    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
-    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
-
-    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
-    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
-    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
-
-    sd::ops::lstmLayer opFF;
-    sd::ops::lstmLayer_bp opBP;
-
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
-
-    ASSERT_TRUE(isGradCorrect);
-}
-
 ////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests13, batchnorm_test1) {
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp
index 3d86cd92b..e01900e87 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp
@@ -1923,7 +1923,6 @@ TEST_F(DeclarableOpsTests15, TestTensorMmul_BP17) {
     ASSERT_TRUE(isGradCorrect);
 }
 
-
 //////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests15, gru_1) {
 
@@ -1960,31 +1959,67 @@ TEST_F(DeclarableOpsTests15, gru_1) {
 }
 
 //////////////////////////////////////////////////////////////////////
-TEST_F(DeclarableOpsTests15, gru_bp_1) {
+TEST_F(DeclarableOpsTests15, sqrtm_1) {
 
-    const int sL = 3;
-    const int bS = 2;
-    const int nIn = 5;
-    const int nOut = 4;
+    NDArray x1('c', {1,1}, {4.}, sd::DataType::DOUBLE);
+    NDArray x2('c', {2,2}, {1.3,2,0.3,.5}, sd::DataType::DOUBLE);
+    NDArray x3('c', {3,3}, {0.5 ,-0.4 ,1.2 ,-2.8 ,-0.2 ,-2.1 ,-2.4 ,-2.0 ,1.1}, sd::DataType::DOUBLE);
+    NDArray x4('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray x5('c', {5,5}, {2.4 ,0.3 ,0.0 ,1.1 ,1.8 ,0.1 ,1.7 ,2.7 ,1.5 ,2.6 ,0.6 ,2.1 ,2.2 ,1.0 ,0.2 ,1.2 ,2.8 ,1.9 ,0.8 ,2.0 ,0.5 ,1.6 ,0.9 ,1.4 ,2.5}, sd::DataType::DOUBLE);
 
+    NDArray exp1('c', {1,1}, {2.}, sd::DataType::DOUBLE);
+    NDArray exp2('c', {2,2}, {1.0163674, 1.3341597,0.200124, 0.4827035}, sd::DataType::DOUBLE);
+    NDArray exp3('c', {3,3}, {6.5692188, 2.6273616,-0.1387864,-16.8404762,-7.0296495, 0.9204148,-11.4664296,-5.834273 , 2.2087478}, sd::DataType::DOUBLE);
+    NDArray exp4('c', {4,4}, {1.161387 ,-1.9343154, 0.230372 , 0.8660897,0.80588  , 3.4045446,-1.0152824,-2.0369467,2.2589629, 1.9674252, 1.5109997,-1.4283141,0.0226356, 1.3032279,-1.00396  , 1.8278487}, sd::DataType::DOUBLE);
+    NDArray exp5('c', {5,5}, {1.4175046,-0.4425298, 0.1846149, 0.3166522, 0.9140631,-0.1929139, 0.2889113, 1.4045273, 0.2600026, 1.552021 , 0.1372758, 0.5703854, 1.3336126, 0.3869317,-0.082492 ,
+                                0.8607272, 3.1792474,-0.9499947, 0.8541668,-1.4243879, 0.0081136,-0.0622248, 0.4534325, 0.4641865, 1.8132138}, sd::DataType::DOUBLE);
 
-    NDArray x('c', {sL, bS, nIn}, {0.5,  1. ,  1.5,  2. ,  2.5, 3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,  6.5,  7. ,  7.5, 8. ,  8.5,  9. ,  9.5, 10. ,  10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE);
-    NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE);
-    NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE);
-    NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE);
-    NDArray b('c', {3*nOut}, sd::DataType::DOUBLE);
+    sd::ops::sqrtm op;
 
-    NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE);
+    auto results = op.evaluate({&x1}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(exp1.isSameShape(results.at(0)));
+    ASSERT_TRUE(exp1.equalsTo(results.at(0)));
 
-    Wx.linspace(1,-0.1);
-    Wh.linspace(0.2,0.2);
-    b.linspace(1,-0.15);
+    results = op.evaluate({&x2}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(exp2.isSameShape(results.at(0)));
+    ASSERT_TRUE(exp2.equalsTo(results.at(0)));
 
-    const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {});
-    const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {});
+    results = op.evaluate({&x3}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(exp3.isSameShape(results.at(0)));
+    ASSERT_TRUE(exp3.equalsTo(results.at(0)));
 
-    sd::ops::gru opFF;
-    sd::ops::gru_bp opBP;
+    results = op.evaluate({&x4}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(exp4.isSameShape(results.at(0)));
+    ASSERT_TRUE(exp4.equalsTo(results.at(0)));
 
-    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
+    results = op.evaluate({&x5}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(exp5.isSameShape(results.at(0)));
+    ASSERT_TRUE(exp5.equalsTo(results.at(0)));
+}
+
+//////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests15, sqrtm_2) {
+
+    NDArray x('c', {10,10}, {-0.3 ,2.7 ,4.9 ,7.0 ,7.3 ,-1.3 ,0.5 ,9.9 ,-9.4 ,8.4 ,2.2 ,5.2 ,7.6 ,1.2 ,2.0 ,-3.8 ,2.1 ,6.1 ,1.6 ,6.9 ,5.1 ,5.3 ,6.4 ,8.7 ,0.1 ,8.5 ,
+                               3.3 ,1.0 ,6.8 ,0.4 ,0.7 ,3.2 ,7.4 ,6.7 ,1.1 ,7.2 ,6.0 ,7.5 ,9.7 ,5.4 ,9.0 ,6.3 ,0.0 ,4.5 ,8.3 ,7.9 ,3.0 ,6.5 ,0.6 ,8.0 ,9.5 ,3.6 ,1.9 ,6.2 ,0.9 ,4.0 ,4.1 ,
+                               8.1 ,3.9 ,4.3 ,4.7 ,3.7 ,3.4 ,5.8 ,10.0 ,8.6 ,9.3 ,9.1 ,4.6 ,1.4 ,7.8 ,1.5 ,7.7 ,4.2 ,9.6 ,8.2 ,-7.1 ,5.7 ,5.5 ,2.6 ,8.8 ,2.9 ,0.2 ,5.6 ,-2.5 ,8.9 ,2.8 ,0.8 ,1.5 ,3.1 ,3.5 ,4.4 ,2.4 ,9.2 ,-4.8 ,1.7 ,6.6 ,9.8 ,1.8 ,5.9}, sd::DataType::DOUBLE);
+
+    NDArray expZ('c', {10,10}, {1.2779038,  0.0333321,  0.8215617,  0.5736392,  1.3973911, -1.1757741,0.1990005,  1.5893778, -3.0159568,  2.5829108,0.5692253,  2.219431 ,  1.022612 , -0.3131795, -0.1957848, -1.7805065,
+                                0.6668489,  1.1968921,  0.9781974,  1.2007764,0.7028634,  0.7496937,  2.2511438,  2.1945378,  0.2559353,  2.8948612,-0.4306994, -0.9922216,  0.3884369, -1.4174481,
+                                -1.6060233,  0.1571057,  1.432471 ,  0.4508346,  0.0618069, -2.4511742,2.0641709,  2.4751085,  1.84787  ,  3.4146313,0.7774219,  0.768369 , -0.1417226, -0.3970577,  2.9512879,  0.5474537,
+                                0.4991412,  0.7604095,  0.4523091,  1.7813704,2.5998339,  0.9402402, -0.82775  ,  2.3637147, -0.6394584,  4.6181937,-0.1762181, -0.2820475,  0.9280713, -2.1876918,
+                                0.1576249,  0.336376 ,  0.2017592,  0.851786 ,  1.3542577,  1.2752901,2.9718476,  1.1102557,  0.0067319, -0.2652283,0.8839235, -0.2637131,  1.5687876,  0.5156139,  1.9015886,  0.9087172,
+                                -1.5607482,  2.4216275,  1.0399745, -0.4930439,1.3044354,  0.1690006,  0.2106909, -0.2683631, -0.4193939,  1.0233265,0.4571777, -0.2024148,  2.3564855,  1.0442339,
+                                1.1073322,  1.0728525, -0.5917566,  2.2267418, -1.6096582,  2.0685315,0.6800798,  0.4451858, -0.4048465,  1.2347676}, sd::DataType::DOUBLE);
+    sd::ops::sqrtm op;
+
+    auto results = op.evaluate({&x}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    ASSERT_TRUE(expZ.isSameShape(results.at(0)));
+    ASSERT_TRUE(expZ.equalsTo(results.at(0)));
 }
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
index f111a888a..5f1aefe36 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp
@@ -241,6 +241,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode) {
     ASSERT_EQ(exp, initial);
 }
 
+#ifdef _RELEASE
 TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
   // [2,1,135079944,1,1,8192,1,99]
   auto initial = NDArrayFactory::create<float>('c', {1, 135079944});
@@ -287,6 +288,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) {
 
   ASSERT_EQ(exp, initial);
 }
+#endif
 
 
 
diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp
index e25bd0144..fae8c4918 100644
--- a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp
+++ b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp
@@ -45,61 +45,41 @@ public:
 
 };
 
-#ifndef __CUDABLAS__
-
-TEST_F(HelpersTests1, test_binary_search_1) {
-    std::array<int, 10> array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-
-    auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10);
-    ASSERT_EQ(2, idx);
-}
-
-TEST_F(HelpersTests1, test_binary_search_2) {
-    std::array<int, 10> array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-
-    auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10);
-    ASSERT_EQ(-1, idx);
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, evalHHmatrix_test1) {
+// ///////////////////////////////////////////////////////////////////
+// TEST_F(HelpersTests1, evalHHmatrix_test1) {
 
 
-    auto x = NDArrayFactory::create<double>('c', {1,4}, {14,17,3,1});
-    auto exp = NDArrayFactory::create<double>('c', {4,4}, {-0.629253, -0.764093,   -0.13484, -0.0449467, -0.764093,  0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377,    0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987,    0.99876});
+//     auto x = NDArrayFactory::create<double>('c', {4}, {14,17,3,1});
+//     auto exp = NDArrayFactory::create<double>('c', {4,4}, {-0.629253, -0.764093,   -0.13484, -0.0449467, -0.764093,  0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377,    0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987,    0.99876});
 
-    auto result = ops::helpers::Householder<double>::evalHHmatrix(x);
-    ASSERT_TRUE(result.isSameShape(&exp));
-    ASSERT_TRUE(result.equalsTo(&exp));
+//     auto result = ops::helpers::Householder<double>::evalHHmatrix(x);
+//     ASSERT_TRUE(result.isSameShape(&exp));
+//     ASSERT_TRUE(result.equalsTo(&exp));
 
-}
+// }
 
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, evalHHmatrix_test2) {
+// ///////////////////////////////////////////////////////////////////
+// TEST_F(HelpersTests1, evalHHmatrix_test2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto x = NDArrayFactory::create<double>('c', {1,3}, {14,-4,3});
-    auto exp = NDArrayFactory::create<double>('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027});
+//     #ifdef __CUDABLAS__
+//     return;
+//     #endif
+//     auto x = NDArrayFactory::create<double>('c', {3}, {14,-4,3});
+//     auto exp = NDArrayFactory::create<double>('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027});
 
-    auto result = ops::helpers::Householder<double>::evalHHmatrix(x);
+//     auto result = ops::helpers::Householder<double>::evalHHmatrix(x);
 
-    ASSERT_TRUE(result.isSameShape(&exp));
-    ASSERT_TRUE(result.equalsTo(&exp));
-
-}
+//     ASSERT_TRUE(result.isSameShape(&exp));
+//     ASSERT_TRUE(result.equalsTo(&exp));
 
+// }
 
 /////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, evalHHmatrixData_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto x = NDArrayFactory::create<double>('c', {1,4}, {14,17,3,1});
-    auto tail = NDArrayFactory::create<double>('c', {1,3});
-    auto expTail = NDArrayFactory::create<double>('c', {1,3}, {0.468984, 0.0827618, 0.0275873});
+    auto x = NDArrayFactory::create<double>('c', {4}, {14,17,3,1});
+    auto tail = NDArrayFactory::create<double>('c', {3});
+    auto expTail = NDArrayFactory::create<double>('c', {3}, {0.468984, 0.0827618, 0.0275873});
     const double normXExpected = -22.2486;
     const double coeffExpected = 1.62925;
 
@@ -110,34 +90,24 @@ TEST_F(HelpersTests1, evalHHmatrixData_test1) {
     ASSERT_NEAR(coeff, coeffExpected, 1e-5);
     ASSERT_TRUE(tail.isSameShapeStrict(expTail));
     ASSERT_TRUE(tail.equalsTo(&expTail));
-
 }
 
-
 /////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, Householder_mulLeft_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto x = NDArrayFactory::create<double>('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16});
     auto tail = NDArrayFactory::create<double>('c', {1,3}, {0.5,0.5,0.5});
     auto exp = NDArrayFactory::create<double>('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9});
 
     ops::helpers::Householder<double>::mulLeft(x, tail, 0.1);
-    // expTail.printShapeInfo();
 
     ASSERT_TRUE(x.isSameShapeStrict(exp));
     ASSERT_TRUE(x.equalsTo(&exp));
-
 }
 
 /////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, Householder_mulLeft_test2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto x = NDArrayFactory::create<double>('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16});
     auto tail = NDArrayFactory::create<double>('c', {3,1}, {0.5,0.5,0.5});
     auto exp = NDArrayFactory::create<double>('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9});
@@ -152,9 +122,6 @@ TEST_F(HelpersTests1, Householder_mulLeft_test2) {
 /////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, Householder_mulRight_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto x = NDArrayFactory::create<double>('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16});
     auto tail = NDArrayFactory::create<double>('c', {1,3}, {0.5,0.5,0.5});
     auto exp = NDArrayFactory::create<double>('c', {4,4}, {9,17.5,12.5,  1.5, 7, 2.5,15.5, 17.5, 15.8,16.4, 3.4,  1.4, 4.3,3.15,1.15,15.15});
@@ -163,16 +130,11 @@ TEST_F(HelpersTests1, Householder_mulRight_test1) {
 
     ASSERT_TRUE(x.isSameShapeStrict(exp));
     ASSERT_TRUE(x.equalsTo(&exp));
-
 }
 
-
 /////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, BiDiagonalizeUp_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6,13,11,7,6,3,7,4,7,6,6,7,10});
     auto hhMatrixExp = NDArrayFactory::create<double>('c', {4,4}, {1.524000,  1.75682,0.233741,0.289458, 0.496646,   1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367,0, 0.229221,-0.272237,0.938237,0});
     auto hhBidiagExp = NDArrayFactory::create<double>('c', {4,4}, {-17.1756, 24.3869,       0,      0, 0,-8.61985,-3.89823,      0, 0,       0, 4.03047,4.13018, 0,       0,       0,1.21666});
@@ -189,15 +151,11 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test1) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, BiDiagonalizeUp_test2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto hhMatrixExp = NDArrayFactory::create<double>('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025,  1.66979,-0.444696, 0.114105,0.130601, 1.58392,        0, -0.22821, 0.215638,0.0524781,  1.99303, 0.0760699,0.375605, 0.509835,0.0591568});
     auto hhBidiagExp = NDArrayFactory::create<double>('c', {4,4}, {-17.2916,7.03123,       0,       0, 0, 16.145,-22.9275,       0, 0,      0, -9.9264,-11.5516, 0,      0,       0,-12.8554});
 
     ops::helpers::BiDiagonalUp object(matrix);
-    // object._HHmatrix.printBuffer();
 
     ASSERT_TRUE(hhMatrixExp.isSameShapeStrict(object._HHmatrix));
     ASSERT_TRUE(hhMatrixExp.equalsTo(&object._HHmatrix));
@@ -208,9 +166,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test2) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, BiDiagonalizeUp_test3) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12, 0,-15,10,2});
     auto hhMatrixExp = NDArrayFactory::create<double>('c', {6,4}, {1.52048,  1.37012, 0.636326, -0.23412, 0.494454,  1.65232,  1.59666,-0.502606, 0.114105, 0.129651,  1.35075,        0, -0.22821, 0.214071, 0.103749,  1.61136, 0.0760699, 0.372875, 0.389936,   0.2398, 0,0.0935171,-0.563777, 0.428587});
     auto hhBidiagExp = NDArrayFactory::create<double>('c', {4,4}, {-17.2916,7.03123,       0,      0, 0,16.3413,-20.7828,      0, 0,      0,-18.4892,4.13261, 0,      0,       0,-21.323});
@@ -227,9 +182,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test3) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto vectorsUseqExp = NDArrayFactory::create<double>('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025,  1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781,  1.99303, 0.0760699,0.375605, 0.509835,0.0591568});
     auto vectorsVseqExp = NDArrayFactory::create<double>('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025,  1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781,  1.99303, 0.0760699,0.375605, 0.509835,0.0591568});
@@ -254,9 +206,6 @@ TEST_F(HelpersTests1, HHsequence_test1) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2});
     auto vectorsUseqExp = NDArrayFactory::create<double>('c', {6,4}, {1.52048,  1.37012, 0.636326, -0.23412, 0.494454,  1.65232,  1.59666,-0.502606, 0.114105, 0.129651,  1.35075,        0, -0.22821, 0.214071, 0.103749,  1.61136, 0.0760699, 0.372875, 0.389936,   0.2398, 0,0.0935171,-0.563777, 0.428587});
     auto vectorsVseqExp = NDArrayFactory::create<double>('c', {6,4}, {1.52048,  1.37012, 0.636326, -0.23412, 0.494454,  1.65232,  1.59666,-0.502606, 0.114105, 0.129651,  1.35075,        0, -0.22821, 0.214071, 0.103749,  1.61136, 0.0760699, 0.372875, 0.389936,   0.2398, 0,0.0935171,-0.563777, 0.428587});
@@ -281,9 +230,6 @@ TEST_F(HelpersTests1, HHsequence_test2) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test3) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10});
     auto vectorsUseqExp = NDArrayFactory::create<double>('c', {4,4}, {1.524,  1.75682,0.233741,0.289458, 0.496646,   1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367,       0, 0.229221,-0.272237,0.938237, 0});
     auto vectorsVseqExp = NDArrayFactory::create<double>('c', {4,4}, {1.524,  1.75682,0.233741,0.289458, 0.496646,   1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367,       0, 0.229221,-0.272237,0.938237, 0});
@@ -308,9 +254,6 @@ TEST_F(HelpersTests1, HHsequence_test3) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test4) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10});
     auto exp    = NDArrayFactory::create<double>('c', {4,4}, {2.49369, 2.62176, 5.88386, 7.69905, -16.0588,-18.7319,-9.15007,-12.6164, 4.7247, 3.46252, 1.02038, -1.4533, 2.9279,-2.29178, 1.90139,-0.66187});
 
@@ -325,9 +268,6 @@ TEST_F(HelpersTests1, HHsequence_test4) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test5) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto exp    = NDArrayFactory::create<double>('c', {5,4}, {4.52891, 8.09473,-2.73704,-13.0302, -11.0752, 7.41549,-3.75125,0.815252, -7.76818,-15.9102,-9.90869,-11.8677, 1.63942,-17.0312,-9.05102,-4.49088, -9.63311,0.540226,-1.52764, 5.79111});
 
@@ -342,9 +282,6 @@ TEST_F(HelpersTests1, HHsequence_test5) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test6) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto matrix2 = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9,-1,3,9, -4.43019,-15.1713, -3.2854,-7.65743, -9.39162,-7.03599, 8.03827, 9.48453, -2.97785, -16.424, 5.35265,-20.1171, -0.0436177, -13.118,-8.37287,-17.3012, -1.14074, 4.18282,-10.0914,-5.69014});
@@ -360,9 +297,6 @@ TEST_F(HelpersTests1, HHsequence_test6) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test7) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10});
     auto exp    = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6,-5.90424,-2.30926,-0.447417, 3.05712, -10.504,-9.31339, -8.85493,-10.8886, -8.29494,-10.6737, -5.94895,-7.55591});
 
@@ -376,9 +310,6 @@ TEST_F(HelpersTests1, HHsequence_test7) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test8) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto exp    = NDArrayFactory::create<double>('c', {5,4}, {9,     -13,        3,       6, 13,      11,        7,      -6, -6.90831,-5.01113, 0.381677,0.440128, -0.80107,0.961605,-0.308019,-1.96153, -0.795985, 18.6538,  12.0731, 16.9988});
 
@@ -392,9 +323,6 @@ TEST_F(HelpersTests1, HHsequence_test8) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test9) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9,     -13,        3,       6, 13,      11,        7,      -6, 3,       7,        4,       7, 3.77597, 18.6226,-0.674868, 4.61365, 5.02738,-14.1486, -2.22877,-8.98245, -0.683766, 1.73722,  14.9859, 12.0843});
 
@@ -408,9 +336,6 @@ TEST_F(HelpersTests1, HHsequence_test9) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test10) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10});
     auto matrix2 = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9,      -1,       3,        9, 10,      11,      -7,       -5, 3,       2,       4,        7, 2.58863, 11.0295,-4.17483,-0.641012, -1.21892,-16.3151, 6.12049, -20.0239, -0.901799,-15.0389,-12.4944, -20.2394});
@@ -425,9 +350,6 @@ TEST_F(HelpersTests1, HHsequence_test10) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test11) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12});
     auto matrix2 = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9,      -1,       3,       9, 10,      11,      -7,      -5, 3,       2,       4,       7, 1.14934, 4.40257, 8.70127,-1.18824, 1.5132,0.220419,-11.6285,-11.7549, 2.32148, 24.3838,0.256531, 25.9116});
@@ -442,9 +364,6 @@ TEST_F(HelpersTests1, HHsequence_test11) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test12) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9});
     auto matrix2 = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9,      -1,       3,       9, 10,      11,      -7,      -5, 3,       2,       4,       7, -1,       6,       7,      19, -2.62252,-22.2914, 4.76743,-19.6689, -1.05943,-9.00514,-11.8013,-7.94571});
@@ -459,9 +378,6 @@ TEST_F(HelpersTests1, HHsequence_test12) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test13) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9});
     auto matrix2 = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto exp    = NDArrayFactory::create<double>('c', {6,4}, {9 ,     -1 ,      3 ,      9, -4.65167, 3.44652, 7.83593, 22.6899, -9.48514, -21.902, 5.66559,-13.0533, -0.343184, 15.2895,  7.2888, 14.0489, 0.289638,-1.87752,   3.944,-1.49707, -2.48845, 3.18285,-10.6685,0.406502});
@@ -476,9 +392,6 @@ TEST_F(HelpersTests1, HHsequence_test13) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test14) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9});
     auto matrix2 = NDArrayFactory::create<double>('c',{5,5}, {9,-1,3,9,10,  11,-7,-5,3, 2,  4,7,-1,6,7,  19,2,17,9,15, 2,17,-9,15,2});
     auto exp    = NDArrayFactory::create<double>('c', {5,5}, {1.78958,  8.06962,-6.13687, 4.36267, 1.06472, -14.9578,  -8.1522, 1.30442,-18.3343,-13.2578, 13.5536,  5.50764, 15.7859, 7.60831, 11.7871, -1.3626,-0.634986, 7.60934, -2.1841, 5.62694, -13.0577,  15.1554, -7.6511, 3.76365,-5.87368});
@@ -494,9 +407,6 @@ TEST_F(HelpersTests1, HHsequence_test14) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test15) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9});
     auto matrix2 = NDArrayFactory::create<double>('c',{5,5}, {9,-1,3,9,10,  11,-7,-5,3, 2,  4,7,-1,6,7,  19,2,17,9,15, 2,17,-9,15,2});
     auto exp    = NDArrayFactory::create<double>('c', {5,5}, {9,      -1,       3,       9,      10, 11,      -7,      -5,       3,       2, 4,       7,      -1,       6,       7, -9.26566,-16.4298, 1.64125,-17.3243,-7.70257, -16.7077, 4.80216,-19.1652,-2.42279,-13.0258});
@@ -511,9 +421,6 @@ TEST_F(HelpersTests1, HHsequence_test15) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test16) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {9,-1,3,9,10,  11,-7,-5,3, 2,  4,7,-1,6,7,  19,2,17,9,15, 2,17,-9,15,2});
     auto matrix2 = NDArrayFactory::create<double>('c', {10,10});
     matrix2 = 100.;
@@ -529,9 +436,6 @@ TEST_F(HelpersTests1, HHsequence_test16) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test17) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {9,-1,3,9,10,  11,-7,-5,3, 2,  4,7,-1,6,7,  19,2,17,9,15, 2,17,-9,15,2});
     auto matrix2 = NDArrayFactory::create<double>('c', {10,10});
     matrix2 = 100.;
@@ -547,9 +451,6 @@ TEST_F(HelpersTests1, HHsequence_test17) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test18) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix  = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto matrix2 = NDArrayFactory::create<double>('c', {10,10});
     matrix2 = 100.;
@@ -565,9 +466,6 @@ TEST_F(HelpersTests1, HHsequence_test18) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, HHsequence_test19) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix  = NDArrayFactory::create<double>('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15});
     auto matrix2 = NDArrayFactory::create<double>('c', {10,10});
     matrix2 = 100.;
@@ -581,305 +479,48 @@ TEST_F(HelpersTests1, HHsequence_test19) {
 }
 
 ///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test1) {
+TEST_F(HelpersTests1, HHcolPivQR_1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix  = NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
-    auto matrix2 = NDArrayFactory::create<double>('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11});
-    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {-17,14,9,-12,-12, 5,-4,    -19, -7,-12, 15,16,17.0294, -6,  8, -10,14,    -15,  6,-10, -14,12,      0,-16,  0});
-    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13,  20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9,       9, 6,      14,-11});
+    auto matrix1 = NDArrayFactory::create<double>('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
 
-    ops::helpers::SVD<double> svd(matrix, 4, true, true, true, 't');
-    svd._m = matrix;
-    svd._u = matrix2;
-    svd.deflation1(1,1,2,2);
+    auto expQR = NDArrayFactory::create<double>('c', {5,6}, {-32.6649659, -4.9594419, -8.2657365, 7.2248659, 16.5927006, 11.7251002, -0.1354883, -29.0586293, 10.9775804, -14.6886248, 4.1884104, 20.7115773, 0.3483986, 0.3236753, 25.5376258, 1.6432380, 9.6395914, -9.0237996, -0.0580664, 0.0798999, -0.0799029, 19.5280665, -4.9773587, 16.0968604, 0.3483986, -0.6667832, 0.0252425, 0.0159188, 10.6978354, -4.6919842});
+    auto expCoeffs = NDArrayFactory::create<double>('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0});
+    auto expPermut = NDArrayFactory::create<double>('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0});
 
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ops::helpers::HHcolPivQR qr(matrix1);
+
+    ASSERT_TRUE(expQR.equalsTo(&qr._qr));
+    ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs));
+    ASSERT_TRUE(expPermut.equalsTo(&qr._permut));
+
+    ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr));
+    ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs));
+    ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut));
 }
 
 ///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test2) {
+TEST_F(HelpersTests1, HHcolPivQR_2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix= NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
-    auto matrix2 = NDArrayFactory::create<double>('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11});
-    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {22.6716,14,  9,-12,-12, 5,-4,-19, -7,-12, 0,16,  0, -6,  8, -10,14,-15,  6,-10, -14,12, -1,-16,  3});
-    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {-12.1738, 3, -13.4089,  7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11});
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
 
-    ops::helpers::SVD<double> svd(matrix, 4, true, true, true);
-    svd._m = matrix;
-    svd._u = matrix2;
-    svd.deflation1(0,0,2,2);
+    auto expQR = NDArrayFactory::create<double>('c', {6,6}, {38.1707, -3.03898, 5.16103,  23.0805, -7.57126, -13.885, -0.41519,  34.3623, 3.77403,  2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341,  12.8421, 28.5491, -0.290633, 0.111912,0.450367,  28.1139,  15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127,   22.294,-2.94931, 0.249114,0.0627956,0.657873,  0.76767,-0.752594,-7.46986});
+    auto expCoeffs = NDArrayFactory::create<double>('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0});
+    auto expPermut = NDArrayFactory::create<double>('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0});
 
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ops::helpers::HHcolPivQR qr(matrix1);
+
+    ASSERT_TRUE(expQR.equalsTo(&qr._qr));
+    ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs));
+    ASSERT_TRUE(expPermut.equalsTo(&qr._permut));
+
+    ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr));
+    ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs));
+    ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut));
 }
 
 ///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test3) {
+TEST_F(HelpersTests1, HHcolPivQR_3) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix= NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
-    auto matrix2 = NDArrayFactory::create<double>('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20});
-    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {-17,14,9,-12,-12, 5,-4,    -19, -7,-12, 15,16,17.0294, -6,  8, -10,14,    -15,  6,-10, -14,12,      0,-16,  0});
-    auto expU  = NDArrayFactory::create<double>('c', {2,6}, {18, 2.58377,   2,  7.16409,-11,  7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20});
-
-    ops::helpers::SVD<double> svd(matrix, 4, false, true, true, 't');
-    svd._m = matrix;
-    svd._u = matrix2;
-    svd.deflation1(1,1,2,2);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test4) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,     19,-18, -6, 3,  6,      2, -7, -7, 14,  8,     18,-17, 18, -14,-15,8.06226,  2,  2, -3,-18,      0,-17,  2, 12, 18,      6, -2,-17});
-    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-16,     -20,     13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18,      17,    -10,-19, 14, -2, -7,     -17,    -14, -4,-16, 18, -6,     -18,      1,-15,-12});
-    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1,     19,      -7, 1, 2,-18,    -13,      14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8,      8,      -2, 7, 16, 15,     -3,       7, 0});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    svd.deflation2(1, 2, 2, 1, 1, 2, 1);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-    ASSERT_TRUE(expV.equalsTo(&svd._v));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test5) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {18.4391, 20,     19,-18, -6, 3,  6,      2, -7, -7, 0,  8,18.4391,-17, 18, -14,-15,      1,  2,  2, -3,-18,      8,-17,-19, 12, 18,      6, -2,-17});
-    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2,      -7,-17,     -14, -4,-16, 18,      -6,-18,       1,-15,-12});
-    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,       1, 19,     -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019,  8,9.65341,-6, -3,      -8,  8,     -2, 7, 16,      15, -3,      7, 0});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    svd.deflation2(1, 0, 1, 1, 0, 2, 2);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-    ASSERT_TRUE(expV.equalsTo(&svd._v));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test6) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-    auto matrix2 = NDArrayFactory::create<double>('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {18.4391, 20,     19,-18, -6, 3,  6,      2, -7, -7, 0,  8,18.4391,-17, 18, -14,-15,      1,  2,  2, -3,-18,      8,-17,-19, 12, 18,      6, -2,-17});
-    auto expU  = NDArrayFactory::create<double>('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9,  -15.8359, -7,-12.2566,-4, 20});
-    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,       1, 19,     -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019,  8,9.65341,-6, -3,      -8,  8,     -2, 7, 16,      15, -3,      7, 0});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, false, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    svd.deflation2(1, 0, 1, 1, 0, 2, 2);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-    ASSERT_TRUE(expV.equalsTo(&svd._v));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test7) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-
-    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,     19,-18, -6, 3,  6,      2, -7, -7, 14,  8,19.6977,-17, 18, -14,-15,      1,  2,  2, -3,-18,      0,-17,  0, 12, 18,      6, -2,-17});
-    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,     -16,-20,      13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18,  -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18,      -6,-18,       1,-15,-12});
-    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11,  8, 2,-6, -3, -8,  8,-2, 7, 16, 15, -3, 7, 0});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    svd.deflation(1, 3, 1, 1, 2, 1);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-    ASSERT_TRUE(expV.equalsTo(&svd._v));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test8) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-
-    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,19,-18, -6, 3,  6, 2, -7, -7, 14,-15, 2,-17, 18, -14,  8, 1, 18,  2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17});
-    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18,  1,-15,-12});
-    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11,  8,-6, 2, -3, -8,  8, 7,-2, 16, 15, -3, 7, 0});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    svd.deflation(0, 2, 2, 1, 2, 1);
-
-    ASSERT_TRUE(expM.equalsTo(&svd._m));
-    ASSERT_TRUE(expU.equalsTo(&svd._u));
-    ASSERT_TRUE(expV.equalsTo(&svd._v));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test9) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto col0 = NDArrayFactory::create<double>('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14});
-    auto diag = NDArrayFactory::create<double>('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2});
-    auto permut = NDArrayFactory::create<double>('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-
-    auto expSingVals = NDArrayFactory::create<double>('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039});
-    auto expShifts = NDArrayFactory::create<double>('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2});
-    auto expMus    = NDArrayFactory::create<double>('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039});
-
-    auto singVals = NDArrayFactory::create<double>('c', {10,1});
-    auto shifts = NDArrayFactory::create<double>('c', {10,1});
-    auto mus    = NDArrayFactory::create<double>('c', {10,1});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd.calcSingVals(col0, diag, permut, singVals, shifts, mus);
-
-    ASSERT_TRUE(expSingVals.equalsTo(&singVals));
-    ASSERT_TRUE(expShifts.equalsTo(&shifts));
-    ASSERT_TRUE(expMus.equalsTo(&mus));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test10) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto singVals = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
-    auto col0 = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
-    auto diag = NDArrayFactory::create<double>('c', {4,1}, {5 ,7 ,-13 ,14});
-    auto permut = NDArrayFactory::create<double>('c', {1,4}, {0 ,2 ,3 ,1 });
-    auto mus  = NDArrayFactory::create<double>('c', {4,1}, {4,1,4,6});
-    auto shifts = NDArrayFactory::create<double>('c', {4,1}, {4,2,5,6});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-
-    auto expZhat = NDArrayFactory::create<double>('c', {4,1}, {0, 0.278208, 72.501953, 0});
-
-    auto zhat = NDArrayFactory::create<double>('c', {4,1});
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd.perturb(col0, diag, permut, singVals, shifts,  mus, zhat);
-
-    ASSERT_NEAR(expZhat.e<double>(1), zhat.e<double>(1), EPS);
-    ASSERT_NEAR(expZhat.e<double>(2), zhat.e<double>(2), EPS);
-}
-
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test11) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto singVals = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
-    auto zhat   = NDArrayFactory::create<double>('c', {4,1}, {2 ,1 ,2 ,1});
-    auto diag = NDArrayFactory::create<double>('c', {4,1}, {5 ,7 ,-13 ,14});
-    auto permut = NDArrayFactory::create<double>('c', {1,4}, {0 ,2 ,3 ,1 });
-    auto mus  = NDArrayFactory::create<double>('c', {4,1}, {4,1,4,6});
-    auto shifts = NDArrayFactory::create<double>('c', {4,1}, {4,2,5,6});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-
-    auto expU = NDArrayFactory::create<double>('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0,        0,        0,        0, 1});
-    auto expV = NDArrayFactory::create<double>('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102,  0.21114,  -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966,  0.167906,  0.416415});
-    auto U = NDArrayFactory::create<double>('c', {5,5});
-    auto V = NDArrayFactory::create<double>('c', {4,4});
-
-
-    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
-    svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V);
-
-    ASSERT_TRUE(expU.equalsTo(&U));
-    ASSERT_TRUE(expV.equalsTo(&V));
-
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test12) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3});
-    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
-    auto matrix4 = NDArrayFactory::create<double>('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13});
-
-    auto expSingVals = NDArrayFactory::create<double>('c', {4,1}, {8.43282, 5, 2.3, 1.10167});
-    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1,        0,        0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0,        0,        0,1});
-    auto expV  = NDArrayFactory::create<double>('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1,        0,        0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873});
-
-    ops::helpers::SVD<double> svd(matrix4, 4, true, true, true, 't');
-    svd._m = matrix1;
-    svd._u = matrix2;
-    svd._v = matrix3;
-    NDArray U, singVals, V;
-    svd.calcBlockSVD(1, 4, U, singVals, V);
-
-    ASSERT_TRUE(expSingVals.equalsTo(&singVals));
-    ASSERT_TRUE(expU.equalsTo(&U));
-    ASSERT_TRUE(expV.equalsTo(&V));
-
-    ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals));
-    ASSERT_TRUE(expU.isSameShapeStrict(U));
-    ASSERT_TRUE(expV.isSameShapeStrict(V));
-}
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test13) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     NDArray matrix1('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
 
     auto expQR = NDArrayFactory::create<double>('c', {6,5}, {-37.054 ,  0.323852 , 8.04231 , -22.9395 ,-13.089, 0.105164,    32.6021,  6.42277, -0.262898,-1.58766, 0.140218,  -0.485058,  29.2073,  -9.92301,-23.7111, -0.262909,-0.00866538, 0.103467,   8.55831,-1.86455, -0.315491,   0.539207,  0.40754,-0.0374124,-7.10401, 0.315491,   0.385363,-0.216459, -0.340008,0.628595});
@@ -898,60 +539,10 @@ TEST_F(HelpersTests1, SVD_test13) {
 
 }
 
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test14) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
-
-    auto expQR = NDArrayFactory::create<double>('c', {5,6}, {-32.665, -4.95944,  -8.26574,  7.22487, 16.5927, 11.7251, -0.135488, -29.0586,   10.9776, -14.6886, 4.18841, 20.7116, 0.348399, 0.323675,   25.5376,  1.64324, 9.63959, -9.0238, -0.0580664,0.0798999,-0.0799029,  19.5281,-4.97736, 16.0969, 0.348399,-0.666783, 0.0252425,0.0159188, 10.6978,-4.69198});
-    auto expCoeffs = NDArrayFactory::create<double>('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0});
-    auto expPermut = NDArrayFactory::create<double>('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0});
-
-    ops::helpers::HHcolPivQR qr(matrix1);
-
-    ASSERT_TRUE(expQR.equalsTo(&qr._qr));
-    ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs));
-    ASSERT_TRUE(expPermut.equalsTo(&qr._permut));
-
-    ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr));
-    ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs));
-    ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut));
-}
-
-
-///////////////////////////////////////////////////////////////////
-TEST_F(HelpersTests1, SVD_test15) {
-
-    #ifdef __CUDABLAS__
-    return;
-    #endif
-    auto matrix1 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
-
-    auto expQR = NDArrayFactory::create<double>('c', {6,6}, {38.1707, -3.03898, 5.16103,  23.0805, -7.57126, -13.885, -0.41519,  34.3623, 3.77403,  2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341,  12.8421, 28.5491, -0.290633, 0.111912,0.450367,  28.1139,  15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127,   22.294,-2.94931, 0.249114,0.0627956,0.657873,  0.76767,-0.752594,-7.46986});
-    auto expCoeffs = NDArrayFactory::create<double>('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0});
-    auto expPermut = NDArrayFactory::create<double>('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0});
-
-    ops::helpers::HHcolPivQR qr(matrix1);
-
-    ASSERT_TRUE(expQR.equalsTo(&qr._qr));
-    ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs));
-    ASSERT_TRUE(expPermut.equalsTo(&qr._permut));
-
-    ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr));
-    ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs));
-    ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut));
-}
-
-
+#ifndef __CUDABLAS__
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test1) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto left = NDArrayFactory::create<double>('c',  {2,2});
     auto right = NDArrayFactory::create<double>('c', {2,2});
@@ -968,9 +559,6 @@ TEST_F(HelpersTests1, JacobiSVD_test1) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test2) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto matrix4 = NDArrayFactory::create<double>('c', {5,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19});
     auto matrix5 = NDArrayFactory::create<double>('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13});
@@ -998,9 +586,6 @@ TEST_F(HelpersTests1, JacobiSVD_test2) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test3) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double, double>(0.6), 0.2});
 
@@ -1014,9 +599,6 @@ TEST_F(HelpersTests1, JacobiSVD_test3) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test4) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double, double>(0.6), 0.2});
 
@@ -1030,9 +612,6 @@ TEST_F(HelpersTests1, JacobiSVD_test4) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test5) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double, double>(0.6), 0.2});
 
@@ -1046,9 +625,6 @@ TEST_F(HelpersTests1, JacobiSVD_test5) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test6) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double, double>(0.6), 0.2});
 
@@ -1062,9 +638,6 @@ TEST_F(HelpersTests1, JacobiSVD_test6) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test7) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double, double>(0.6), 0.2});
 
@@ -1078,9 +651,6 @@ TEST_F(HelpersTests1, JacobiSVD_test7) {
 //////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test8) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
     auto rotation = NDArrayFactory::create<double>('c', {2,2}, {0.2, math::nd4j_sqrt<double, double>(0.6), -math::nd4j_sqrt<double,double>(0.6), 0.2});
 
@@ -1094,9 +664,6 @@ TEST_F(HelpersTests1, JacobiSVD_test8) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test9) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071});
@@ -1113,9 +680,6 @@ TEST_F(HelpersTests1, JacobiSVD_test9) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test10) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071});
@@ -1132,9 +696,6 @@ TEST_F(HelpersTests1, JacobiSVD_test10) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test11) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747});
@@ -1151,9 +712,6 @@ TEST_F(HelpersTests1, JacobiSVD_test11) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test12) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747});
@@ -1170,9 +728,6 @@ TEST_F(HelpersTests1, JacobiSVD_test12) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test13) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957});
@@ -1189,9 +744,6 @@ TEST_F(HelpersTests1, JacobiSVD_test13) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test14) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957});
@@ -1208,9 +760,6 @@ TEST_F(HelpersTests1, JacobiSVD_test14) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, JacobiSVD_test15) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix = NDArrayFactory::create<double>('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10});
 
     auto expS = NDArrayFactory::create<double>('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957});
@@ -1222,13 +771,314 @@ TEST_F(HelpersTests1, JacobiSVD_test15) {
     ASSERT_TRUE(expS.equalsTo(&jac._s));
 }
 
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, JacobiSVD_test16) {
+
+    NDArray rotation('c', {2,2}, sd::DataType::DOUBLE);
+
+    NDArray exp1('c', {2,2}, {1,0,0,1 }, sd::DataType::DOUBLE);
+    NDArray exp2('c', {2,2}, {0,1,-1,0}, sd::DataType::DOUBLE);
+    NDArray exp3('c', {2,2}, {-1,0,0,-1}, sd::DataType::DOUBLE);
+    NDArray exp4('c', {2,2}, {0.983282, 0.182089, -0.182089, 0.983282}, sd::DataType::DOUBLE);
+    NDArray exp5('c', {2,2}, {0.249041, 0.968493, -0.968493, 0.249041}, sd::DataType::DOUBLE);
+
+    ops::helpers::JacobiSVD<double>::createJacobiRotationGivens(0, 0, rotation);
+    ASSERT_TRUE(rotation.equalsTo(exp1));
+    ASSERT_TRUE(rotation.isSameShapeStrict(exp1));
+
+    ops::helpers::JacobiSVD<double>::createJacobiRotationGivens(0, -0.5, rotation);
+    ASSERT_TRUE(rotation.equalsTo(exp2));
+    ASSERT_TRUE(rotation.isSameShapeStrict(exp2));
+
+    ops::helpers::JacobiSVD<double>::createJacobiRotationGivens(-0.5, 0, rotation);
+    ASSERT_TRUE(rotation.equalsTo(exp3));
+    ASSERT_TRUE(rotation.isSameShapeStrict(exp3));
+
+
+    ops::helpers::JacobiSVD<double>::createJacobiRotationGivens(2.7, -0.5, rotation);
+    ASSERT_TRUE(rotation.equalsTo(exp4));
+    ASSERT_TRUE(rotation.isSameShapeStrict(exp4));
+
+    ops::helpers::JacobiSVD<double>::createJacobiRotationGivens(2.7, -10.5, rotation);
+    ASSERT_TRUE(rotation.equalsTo(exp5));
+    ASSERT_TRUE(rotation.isSameShapeStrict(exp5));
+}
+
+TEST_F(HelpersTests1, test_binary_search_1) {
+    std::array<int, 10> array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+    auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10);
+    ASSERT_EQ(2, idx);
+}
+
+TEST_F(HelpersTests1, test_binary_search_2) {
+    std::array<int, 10> array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+    auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10);
+    ASSERT_EQ(-1, idx);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test1) {
+
+    auto matrix  = NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
+    auto matrix2 = NDArrayFactory::create<double>('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11});
+    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {-17,14,9,-12,-12, 5,-4,    -19, -7,-12, 15,16,17.0294, -6,  8, -10,14,    -15,  6,-10, -14,12,      0,-16,  0});
+    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13,  20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9,       9, 6,      14,-11});
+
+    ops::helpers::SVD<double> svd(matrix, 4, true, true, true, 't');
+    svd._m = matrix;
+    svd._u = matrix2;
+    svd.deflation1(1,1,2,2);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test2) {
+
+    auto matrix= NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
+    auto matrix2 = NDArrayFactory::create<double>('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11});
+    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {22.6716,14,  9,-12,-12, 5,-4,-19, -7,-12, 0,16,  0, -6,  8, -10,14,-15,  6,-10, -14,12, -1,-16,  3});
+    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {-12.1738, 3, -13.4089,  7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11});
+
+    ops::helpers::SVD<double> svd(matrix, 4, true, true, true);
+    svd._m = matrix;
+    svd._u = matrix2;
+    svd.deflation1(0,0,2,2);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test3) {
+
+    auto matrix= NDArrayFactory::create<double>('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3});
+    auto matrix2 = NDArrayFactory::create<double>('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20});
+    auto expM  = NDArrayFactory::create<double>('c', {5,5}, {-17,14,9,-12,-12, 5,-4,    -19, -7,-12, 15,16,17.0294, -6,  8, -10,14,    -15,  6,-10, -14,12,      0,-16,  0});
+    auto expU  = NDArrayFactory::create<double>('c', {2,6}, {18, 2.58377,   2,  7.16409,-11,  7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20});
+
+    ops::helpers::SVD<double> svd(matrix, 4, false, true, true, 't');
+    svd._m = matrix;
+    svd._u = matrix2;
+    svd.deflation1(1,1,2,2);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test4) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
+    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,     19,-18, -6, 3,  6,      2, -7, -7, 14,  8,     18,-17, 18, -14,-15,8.06226,  2,  2, -3,-18,      0,-17,  2, 12, 18,      6, -2,-17});
+    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-16,     -20,     13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18,      17,    -10,-19, 14, -2, -7,     -17,    -14, -4,-16, 18, -6,     -18,      1,-15,-12});
+    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1,     19,      -7, 1, 2,-18,    -13,      14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8,      8,      -2, 7, 16, 15,     -3,       7, 0});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    svd.deflation2(1, 2, 2, 1, 1, 2, 1);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ASSERT_TRUE(expV.equalsTo(&svd._v));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test5) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
+    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {18.4391, 20,     19,-18, -6, 3,  6,      2, -7, -7, 0,  8,18.4391,-17, 18, -14,-15,      1,  2,  2, -3,-18,      8,-17,-19, 12, 18,      6, -2,-17});
+    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2,      -7,-17,     -14, -4,-16, 18,      -6,-18,       1,-15,-12});
+    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,       1, 19,     -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019,  8,9.65341,-6, -3,      -8,  8,     -2, 7, 16,      15, -3,      7, 0});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    svd.deflation2(1, 0, 1, 1, 0, 2, 2);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ASSERT_TRUE(expV.equalsTo(&svd._v));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test6) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
+    auto matrix2 = NDArrayFactory::create<double>('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {18.4391, 20,     19,-18, -6, 3,  6,      2, -7, -7, 0,  8,18.4391,-17, 18, -14,-15,      1,  2,  2, -3,-18,      8,-17,-19, 12, 18,      6, -2,-17});
+    auto expU  = NDArrayFactory::create<double>('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9,  -15.8359, -7,-12.2566,-4, 20});
+    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,       1, 19,     -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019,  8,9.65341,-6, -3,      -8,  8,     -2, 7, 16,      15, -3,      7, 0});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, false, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    svd.deflation2(1, 0, 1, 1, 0, 2, 2);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ASSERT_TRUE(expV.equalsTo(&svd._v));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test7) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
+    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+
+    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,     19,-18, -6, 3,  6,      2, -7, -7, 14,  8,19.6977,-17, 18, -14,-15,      1,  2,  2, -3,-18,      0,-17,  0, 12, 18,      6, -2,-17});
+    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,     -16,-20,      13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18,  -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18,      -6,-18,       1,-15,-12});
+    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11,  8, 2,-6, -3, -8,  8,-2, 7, 16, 15, -3, 7, 0});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    svd.deflation(1, 3, 1, 1, 2, 1);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ASSERT_TRUE(expV.equalsTo(&svd._v));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test8) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17});
+    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+
+    auto expM  = NDArrayFactory::create<double>('c', {6,5}, {12, 20,19,-18, -6, 3,  6, 2, -7, -7, 14,-15, 2,-17, 18, -14,  8, 1, 18,  2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17});
+    auto expU  = NDArrayFactory::create<double>('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18,  1,-15,-12});
+    auto expV  = NDArrayFactory::create<double>('c', {5,5}, {-18,  1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11,  8,-6, 2, -3, -8,  8, 7,-2, 16, 15, -3, 7, 0});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    svd.deflation(0, 2, 2, 1, 2, 1);
+
+    ASSERT_TRUE(expM.equalsTo(&svd._m));
+    ASSERT_TRUE(expU.equalsTo(&svd._u));
+    ASSERT_TRUE(expV.equalsTo(&svd._v));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test9) {
+
+    auto col0 = NDArrayFactory::create<double>('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14});
+    auto diag = NDArrayFactory::create<double>('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2});
+    auto permut = NDArrayFactory::create<double>('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+
+    auto expSingVals = NDArrayFactory::create<double>('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039});
+    auto expShifts = NDArrayFactory::create<double>('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2});
+    auto expMus    = NDArrayFactory::create<double>('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039});
+
+    auto singVals = NDArrayFactory::create<double>('c', {10,1});
+    auto shifts = NDArrayFactory::create<double>('c', {10,1});
+    auto mus    = NDArrayFactory::create<double>('c', {10,1});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd.calcSingVals(col0, diag, permut, singVals, shifts, mus);
+
+    ASSERT_TRUE(expSingVals.equalsTo(&singVals));
+    ASSERT_TRUE(expShifts.equalsTo(&shifts));
+    ASSERT_TRUE(expMus.equalsTo(&mus));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test10) {
+
+    auto singVals = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
+    auto col0 = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
+    auto diag = NDArrayFactory::create<double>('c', {4,1}, {5 ,7 ,-13 ,14});
+    auto permut = NDArrayFactory::create<double>('c', {1,4}, {0 ,2 ,3 ,1 });
+    auto mus  = NDArrayFactory::create<double>('c', {4,1}, {4,1,4,6});
+    auto shifts = NDArrayFactory::create<double>('c', {4,1}, {4,2,5,6});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+
+    auto expZhat = NDArrayFactory::create<double>('c', {4,1}, {0, 0.278208, 72.501953, 0});
+
+    auto zhat = NDArrayFactory::create<double>('c', {4,1});
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd.perturb(col0, diag, permut, singVals, shifts,  mus, zhat);
+
+    ASSERT_NEAR(expZhat.e<double>(1), zhat.e<double>(1), EPS);
+    ASSERT_NEAR(expZhat.e<double>(2), zhat.e<double>(2), EPS);
+}
+
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test11) {
+
+    auto singVals = NDArrayFactory::create<double>('c', {4,1}, {1 ,1 ,1 ,1});
+    auto zhat   = NDArrayFactory::create<double>('c', {4,1}, {2 ,1 ,2 ,1});
+    auto diag = NDArrayFactory::create<double>('c', {4,1}, {5 ,7 ,-13 ,14});
+    auto permut = NDArrayFactory::create<double>('c', {1,4}, {0 ,2 ,3 ,1 });
+    auto mus  = NDArrayFactory::create<double>('c', {4,1}, {4,1,4,6});
+    auto shifts = NDArrayFactory::create<double>('c', {4,1}, {4,2,5,6});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+
+    auto expU = NDArrayFactory::create<double>('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0,        0,        0,        0, 1});
+    auto expV = NDArrayFactory::create<double>('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102,  0.21114,  -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966,  0.167906,  0.416415});
+    auto U = NDArrayFactory::create<double>('c', {5,5});
+    auto V = NDArrayFactory::create<double>('c', {4,4});
+
+
+    ops::helpers::SVD<double> svd(matrix3, 4, true, true, true, 't');
+    svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V);
+
+    ASSERT_TRUE(expU.equalsTo(&U));
+    ASSERT_TRUE(expV.equalsTo(&V));
+
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests1, SVD_test12) {
+
+    auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3});
+    auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
+    auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
+    auto matrix4 = NDArrayFactory::create<double>('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13});
+
+    auto expSingVals = NDArrayFactory::create<double>('c', {4,1}, {8.43282, 5, 2.3, 1.10167});
+    auto expU  = NDArrayFactory::create<double>('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1,        0,        0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0,        0,        0,1});
+    auto expV  = NDArrayFactory::create<double>('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1,        0,        0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873});
+
+    ops::helpers::SVD<double> svd(matrix4, 4, true, true, true, 't');
+    svd._m = matrix1;
+    svd._u = matrix2;
+    svd._v = matrix3;
+    NDArray U, singVals, V;
+    svd.calcBlockSVD(1, 4, U, singVals, V);
+
+    ASSERT_TRUE(expSingVals.equalsTo(&singVals));
+    ASSERT_TRUE(expU.equalsTo(&U));
+    ASSERT_TRUE(expV.equalsTo(&V));
+
+    ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals));
+    ASSERT_TRUE(expU.isSameShapeStrict(U));
+    ASSERT_TRUE(expV.isSameShapeStrict(V));
+}
 
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, SVD_test16) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3});
     auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
     auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
@@ -1257,9 +1107,6 @@ TEST_F(HelpersTests1, SVD_test16) {
 ///////////////////////////////////////////////////////////////////
 TEST_F(HelpersTests1, SVD_test17) {
 
-    #ifdef __CUDABLAS__
-    return;
-    #endif
     auto matrix1 = NDArrayFactory::create<double>('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3});
     auto matrix2 = NDArrayFactory::create<double>('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12});
     auto matrix3 = NDArrayFactory::create<double>('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0});
@@ -1893,7 +1740,7 @@ TEST_F(HelpersTests1, OpArgsHolder_test3) {
     ASSERT_EQ(Status::OK(), results.status());
     ASSERT_TRUE(exp.isSameShape(tiled));
     ASSERT_TRUE(exp.equalsTo(tiled));
-   
+
     OpArgsHolder holderBP = holderFF.createArgsHolderForBP({&gradO}, true);
     sd::ops::tile_bp opBP;
     results = opBP.execute(holderBP);
@@ -2495,4 +2342,3 @@ TEST_F(HelpersTests1, lstmLayerCell_3) {
     ASSERT_TRUE(expC.isSameShape(c));
     ASSERT_TRUE(expC.equalsTo(c));
 }
-
diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp
new file mode 100644
index 000000000..8a0cc28bf
--- /dev/null
+++ b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp
@@ -0,0 +1,426 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2019 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+#include "testlayers.h"
+#include <helpers/HessenbergAndSchur.h>
+#include <helpers/EigenValsAndVecs.h>
+#include <helpers/FullPivLU.h>
+#include <ops/declarable/helpers/triangular_solve.h>
+#include <helpers/Sqrtm.h>
+
+using namespace sd;
+
+class HelpersTests2 : public testing::Test {
+public:
+
+    HelpersTests2() {
+
+        std::cout<<std::endl<<std::flush;
+    }
+
+};
+
+// #ifndef __CUDABLAS__
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Hessenberg_1) {
+
+
+    NDArray x1('c', {1,4}, {14,17,3,1}, sd::DataType::DOUBLE);
+    NDArray x2('c', {1,1}, {14}, sd::DataType::DOUBLE);
+    NDArray expQ('c', {1,1}, {1}, sd::DataType::DOUBLE);
+
+    ops::helpers::Hessenberg<double> hess1(x1);
+    ASSERT_TRUE(hess1._H.isSameShape(&x1));
+    ASSERT_TRUE(hess1._H.equalsTo(&x1));
+    ASSERT_TRUE(hess1._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess1._Q.equalsTo(&expQ));
+
+    ops::helpers::Hessenberg<double> hess2(x2);
+    ASSERT_TRUE(hess2._H.isSameShape(&x2));
+    ASSERT_TRUE(hess2._H.equalsTo(&x2));
+    ASSERT_TRUE(hess2._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess2._Q.equalsTo(&expQ));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Hessenberg_2) {
+
+    NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE);
+    NDArray expQ('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE);
+
+    ops::helpers::Hessenberg<double> hess(x);
+
+    // hess._H.printBuffer();
+
+    ASSERT_TRUE(hess._H.isSameShape(&x));
+    ASSERT_TRUE(hess._H.equalsTo(&x));
+
+    ASSERT_TRUE(hess._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess._Q.equalsTo(&expQ));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Hessenberg_3) {
+
+    NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE);
+    NDArray expH('c', {3,3}, {33, -23.06939, -48.45414, -57.01061,  12.62845,  3.344058, 0, -9.655942, -5.328448}, sd::DataType::DOUBLE);
+    NDArray expQ('c', {3,3}, {1,0,0,0, -0.99981, -0.019295, 0, -0.019295,0.99981}, sd::DataType::DOUBLE);
+
+    ops::helpers::Hessenberg<double> hess(x);
+
+    ASSERT_TRUE(hess._H.isSameShape(&expH));
+    ASSERT_TRUE(hess._H.equalsTo(&expH));
+
+    ASSERT_TRUE(hess._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess._Q.equalsTo(&expQ));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Hessenberg_4) {
+
+    NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray expH('c', {4,4}, {0.33, 0.4961181,   3.51599,  9.017665, -7.792702,  4.190221,  6.500328,  5.438888, 0,  3.646734, 0.4641911, -7.635502, 0,0,  5.873535,  5.105588}, sd::DataType::DOUBLE);
+    NDArray expQ('c', {4,4}, {1,0,0,0, 0,-0.171956, 0.336675, -0.925787, 0,-0.973988,0.0826795,  0.210976, 0, 0.147574, 0.937984,0.3137}, sd::DataType::DOUBLE);
+
+    ops::helpers::Hessenberg<double> hess(x);
+
+    ASSERT_TRUE(hess._H.isSameShape(&expH));
+    ASSERT_TRUE(hess._H.equalsTo(&expH));
+
+    ASSERT_TRUE(hess._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess._Q.equalsTo(&expQ));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Hessenberg_5) {
+
+    NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 ,
+                            -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 ,
+                            0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 ,
+                            6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE);
+    NDArray expH('c', {10,10}, {6.9,  6.125208, -8.070945,  7.219828, -9.363308,  2.181236,  5.995414,  3.892612,  4.982657, -2.088574,-12.6412,  1.212547, -6.449684,  5.162879, 0.4341714, -5.278079, -2.624011,  -2.03615,  11.39619, -3.034842,
+                                0, -12.71931,   10.1146,  6.494434, -1.062934,  5.668906, -4.672953, -9.319893, -2.023392,  6.090341,0,0, 7.800521,  -1.46286,  1.484626, -10.58252, -3.492978,   2.42187,  5.470045,  1.877265,
+                                0,0,0, 14.78259,-0.3147726,  -5.74874, -0.377823,  3.310056,  2.242614, -5.111574,0,0,0,0, -9.709131,  3.885072,  6.762626,  4.509144,  2.390195, -4.991013,
+                                0,0,0,0,0,  8.126269, -12.32529,  9.030151,  1.390931, 0.8634045,0,0,0,0,0,0, -12.99477,  9.574299,-0.3098022,  4.910835,0,0,0,0,0,0,0,  14.75256,  18.95723, -5.054717,0,0,0,0,0,0,0,0, -4.577715, -5.440827,}, sd::DataType::DOUBLE);
+    NDArray expQ('c', {10,10}, {1,0,0,0,0,0,0,0,0,0,0,-0.0079106,-0.38175,-0.39287,-0.26002,-0.44102,-0.071516,0.12118,0.64392,0.057562,
+                                0,0.28478,0.0058784,0.3837,-0.47888,0.39477,0.0036847,-0.24678,0.3229,0.47042,0,-0.031643,-0.61277,0.087648,0.12014,0.47648,-0.5288,0.060599,0.021434,-0.30102,
+                                0,0.23732,-0.17801,-0.31809,-0.31267,0.27595,0.30134,0.64555,-0.33392,0.13363,0,-0.023732,-0.40236,0.43089,-0.38692,-0.5178,-0.03957,-0.081667,-0.47515,-0.0077949,
+                                0,0.20568,-0.0169,0.36962,0.49669,-0.22475,-0.22199,0.50075,0.10454,0.46112,0,0.41926,0.30243,-0.3714,-0.16795,-0.12969,-0.67572,-0.1205,-0.26047,0.10407,
+                                0,-0.41135,-0.28357,-0.33858,0.18836,0.083822,-0.0068213,-0.30161,-0.24956,0.66327,0,0.68823,-0.33616,-0.12129,0.36163,-0.063256,0.34198,-0.37564,-0.048196,-0.058948}, sd::DataType::DOUBLE);
+
+    ops::helpers::Hessenberg<double> hess(x);
+
+    ASSERT_TRUE(hess._H.isSameShape(&expH));
+    ASSERT_TRUE(hess._H.equalsTo(&expH));
+
+    ASSERT_TRUE(hess._Q.isSameShape(&expQ));
+    ASSERT_TRUE(hess._Q.equalsTo(&expQ));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_1) {
+
+    NDArray x('c', {3,3},  sd::DataType::DOUBLE);
+
+    NDArray expT('c', {3,3}, {-2.5,  -2,   1, 0, 1.5,  -2, 3,   4,   5},  sd::DataType::DOUBLE);
+    NDArray expU('c', {3,3}, {0.3, 0.2,-0.1, 0,-0.1, 0.2, -0.3,-0.4, 0.5},  sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+    schur._T.linspace(-3, 1);
+    schur._U.linspace(-0.3, 0.1);
+
+    schur.splitTwoRows(1, 0.5);
+
+    ASSERT_TRUE(schur._T.isSameShape(&expT));
+    ASSERT_TRUE(schur._T.equalsTo(&expT));
+
+    ASSERT_TRUE(schur._U.isSameShape(&expU));
+    ASSERT_TRUE(schur._U.equalsTo(&expU));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_2) {
+
+    NDArray x('c', {3,3},  sd::DataType::DOUBLE);
+
+    NDArray shift('c', {3}, sd::DataType::DOUBLE);
+    NDArray exp1('c',  {3}, {1,-3,0}, sd::DataType::DOUBLE);
+    NDArray exp2('c',  {3}, {3, 3,-7}, sd::DataType::DOUBLE);
+    NDArray exp3('c',  {3}, {0.964,0.964,0.964}, sd::DataType::DOUBLE);
+    NDArray exp1T('c', {3,3}, {-3,-2,-1,0,1,2,3,4,5}, sd::DataType::DOUBLE);
+    NDArray exp2T('c', {3,3}, {-8,-2,-1,0,-4,2,3,4,0}, sd::DataType::DOUBLE);
+    NDArray exp3T('c', {3,3}, {-9.464102,-2,-1,0,-5.464102,2,3,4,-1.464102,}, sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+    // schur._U.linspace(-0.3, 0.1);    // doesn't matter
+
+    schur._T.linspace(-3, 1);
+    double expShift =0;
+    schur.calcShift(1, 5, expShift, shift);
+    ASSERT_TRUE(schur._T.equalsTo(&exp1T));
+    ASSERT_TRUE(shift.isSameShape(&exp1));
+    ASSERT_TRUE(shift.equalsTo(&exp1));
+    ASSERT_TRUE(expShift == 0);
+
+    schur._T.linspace(-3, 1);
+    expShift = 0;
+    schur.calcShift(2, 10, expShift, shift);
+    ASSERT_TRUE(schur._T.equalsTo(&exp2T));
+    ASSERT_TRUE(shift.isSameShape(&exp2));
+    ASSERT_TRUE(shift.equalsTo(&exp2));
+    ASSERT_TRUE(expShift == 5);
+
+    schur._T.linspace(-3, 1);
+    expShift = 0;
+    schur.calcShift(2, 30, expShift, shift);
+    ASSERT_TRUE(schur._T.equalsTo(&exp3T));
+    ASSERT_TRUE(shift.isSameShape(&exp3));
+    ASSERT_TRUE(shift.equalsTo(&exp3));
+    ASSERT_TRUE((6.4641-0.00001) < expShift && expShift < (6.4641+0.00001));
+}
+
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_3) {
+
+    NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE);
+    NDArray expU('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+
+    ASSERT_TRUE(schur._T.isSameShape(&x));
+    ASSERT_TRUE(schur._T.equalsTo(&x));
+
+    ASSERT_TRUE(schur._U.isSameShape(&expU));
+    ASSERT_TRUE(schur._U.equalsTo(&expU));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_4) {
+
+    NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE);
+    NDArray expT('c', {3,3}, {53.73337,-20.21406,-50.44809,0,-27.51557, 26.74307,0,0,14.0822}, sd::DataType::DOUBLE);
+    NDArray expU('c', {3,3}, {-0.5848506, 0.7185352, 0.3763734,-0.7978391,-0.5932709,-0.1071558,-0.1462962, 0.3629555,-0.9202504}, sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+
+    ASSERT_TRUE(schur._T.isSameShape(&expT));
+    ASSERT_TRUE(schur._T.equalsTo(&expT));
+
+    ASSERT_TRUE(schur._U.isSameShape(&expU));
+    ASSERT_TRUE(schur._U.equalsTo(&expU));
+}
+
+/*
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_5) {
+
+    NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray expT('c', {4,4}, {6.940177,7.201107,2.523849,-8.534745,-3.109643,5.289615,-2.940507,9.330303, 0,0,-0.1740346,   7.19851,0,0, -2.870214, -1.965758}, sd::DataType::DOUBLE);
+    NDArray expU('c', {4,4}, {-0.2602141, 0.8077556,-0.3352316,-0.4091935,0.3285353,-0.4395489,-0.4714875,-0.6903338,0.7536921, 0.3005626,-0.3910435, 0.4343908,-0.5062621, -0.252962,-0.7158242, 0.4090287}, sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+
+    ASSERT_TRUE(schur._T.isSameShape(&expT));
+    ASSERT_TRUE(schur._T.equalsTo(&expT));
+
+    ASSERT_TRUE(schur._U.isSameShape(&expU));
+    ASSERT_TRUE(schur._U.equalsTo(&expU));
+}
+*/
+/*
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, Schur_6) {
+
+    NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 ,
+                            -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 ,
+                            0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 ,
+                            6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE);
+    NDArray expT('c', {10,10}, {-13.78982,  6.072464, 0.3021194, -8.455495,-0.3047058,  4.033153,  2.610364,   2.80607, -2.735616, 0.3040549,-2.188506, -12.38324, -1.167179, -4.539672, -19.08546,  1.752401,-0.1354974,-0.2747422,-0.3270464, -5.070936,
+                                0,0,0.5067366,  7.930223,-0.6465996,  8.659522,  1.283713,  4.551415,   12.7736,    3.4812,0,0,-9.858142, -2.905068, -6.474159, -6.247967, 0.4720073, -10.49523,  3.617189, -4.941627,
+                                0,0,0,0,9.461626, -4.896166,  9.339704,  4.640336,   16.8626,  2.056027,0,0,0,0,6.479812,  8.462862,  7.386285, -4.123457, -5.817095, -2.633641,0,0,0,0,0,0,13.46667, -4.907281,  4.602204,  5.198035,
+                                0,0,0,0,0,0, 7.176822,  16.93311,  2.195036,  1.346086,0,0,0,0,0,0,0,0, 16.86979, -3.052473,0,0,0,0,0,0,0,0,0, -5.52268}, sd::DataType::DOUBLE);
+
+    // NDArray expT('c', {10,10}, {-13.78982,  6.072464, 0.1926198, -8.458698,-0.3047363,  4.033151,  2.610336,  2.806096, -2.735616, 0.3040549,-2.188506, -12.38324, -1.225857,  -4.52418, -19.08548,  1.752257,-0.1354946,-0.2747435,-0.3270464, -5.070936,
+    //                             0,0, 0.4812058,  7.886377,-0.7304318,  8.577898,  1.289673,  4.415163,  12.81936,  3.416929,0,0, -9.901988, -2.879537, -6.465196, -6.359608,  0.455452, -10.55328,  3.451505, -4.986284,
+    //                             0,0,0,0,  9.461614, -4.896159,  9.339602,   4.64046,  16.86265,  2.056047,0,0,0,0,   6.47982,  8.462874,  7.386396, -4.123349, -5.816967, -2.633626,
+    //                             0,0,0,0,0,0, 13.46665, -4.907315,  4.602182,  5.198022,0,0,0,0,0,0, 7.176788,  16.93313,  2.195081,  1.346137,0,0,0,0,0,0,0,0,  16.86979, -3.052473,0,0,0,0,0,0,0,0,0,  -5.52268}, sd::DataType::DOUBLE);
+
+    NDArray expU('c', {10,10}, {0.1964177,  0.2165192, -0.2138164,  0.4083154, -0.1872303, -0.5087223,  0.5529025, -0.2996174,-0.08772947, 0.07126534,-0.1906247,  -0.223588,  0.3574755,  0.4245914, -0.3885589,-0.07328949, -0.4176507, -0.1885168, -0.4476957,  0.1971104,
+                                -0.2219015,  0.3084187,  0.1069209, -0.4905009, -0.3517786,  0.1446875,   0.121738, -0.3772941,  0.1232591,  0.5353205,-0.4766346,  0.6158252, -0.1529085, 0.04780914,  0.1274182, -0.1219211, -0.3123289, -0.2219282,-0.07613826,  -0.429201,
+                                 0.2577533, -0.3356205,  -0.225358, -0.1540796,  0.3155174, -0.1904664, -0.3567101, -0.6831458,  0.1244646, 0.03383783,  -0.45597, -0.3350697, 0.06824276, -0.2861978,-0.06724917, -0.7046481, 0.01664764,  0.2270567,  0.2003283,-0.01544937,
+                                  0.122865,  0.1516775, -0.4446453, -0.2338583,  0.1633447,  -0.193498,  -0.198088,  0.3170272, -0.5869794,  0.4013553,  0.347383,  0.3666581,  0.6890763,-0.05797414,  0.3630058,  -0.319958, -0.1071812, 0.06162044, 0.03171228,  0.1275262,
+                                -0.2986812, 0.05382598, -0.1484276,  0.4936468,   0.362756, 0.05858297, -0.1055183,  0.1090384,  0.4217073,  0.5534347, 0.3864388,  0.2085926,  -0.204135, 0.05230855, -0.5290207, -0.1548485, -0.4670302,  0.2205726,  0.4380318,-0.01626632}, sd::DataType::DOUBLE);
+
+    ops::helpers::Schur<double> schur(x);
+
+    ASSERT_TRUE(schur._T.isSameShape(&expT));
+    ASSERT_TRUE(schur._T.equalsTo(&expT, 1e-3));
+
+    ASSERT_TRUE(schur._U.isSameShape(&expU));
+    ASSERT_TRUE(schur._U.equalsTo(&expU));
+}
+*/
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, EigenValsAndVecs_1) {
+
+    NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE);
+    NDArray expVals('c', {2,2}, {3.25,5.562149, 3.25,-5.562149}, sd::DataType::DOUBLE);
+    NDArray expVecs('c', {2,2,2}, {-0.3094862,-0.0973726, -0.3094862,0.0973726,0,0.9459053, 0,-0.9459053}, sd::DataType::DOUBLE);
+
+    ops::helpers::EigenValsAndVecs<double> eig(x);
+
+    ASSERT_TRUE(eig._Vals.isSameShape(&expVals));
+    ASSERT_TRUE(eig._Vals.equalsTo(&expVals));
+
+    ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs));
+    ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, EigenValsAndVecs_2) {
+
+    NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE);
+    NDArray expVals('c', {3,2}, {53.73337,0, -27.51557,0, 14.0822,0}, sd::DataType::DOUBLE);
+    NDArray expVecs('c', {3,3,2}, {-0.5848506,0,0.5560778,0,-0.04889745,0,-0.7978391,0,-0.7683444,0,-0.8855156,0,-0.1462962,0,0.3168979,0,-0.4620293,0}, sd::DataType::DOUBLE);
+
+    ops::helpers::EigenValsAndVecs<double> eig(x);
+
+    ASSERT_TRUE(eig._Vals.isSameShape(&expVals));
+    ASSERT_TRUE(eig._Vals.equalsTo(&expVals));
+
+    ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs));
+    ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, EigenValsAndVecs_3) {
+
+    NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray expVals('c', {4,2}, {6.114896,4.659591,6.114896,-4.659591, -1.069896,4.45631,-1.069896,-4.45631}, sd::DataType::DOUBLE);
+    NDArray expVecs('c', {4,4,2}, {-0.2141303,0.4815241,-0.2141303,-0.4815241, 0.1035092,-0.4270603,  0.1035092,0.4270603, 0.2703519,-0.2892722,  0.2703519,0.2892722,  -0.5256817,0.044061, -0.5256817,-0.044061,
+                                0.6202137,0.05521234,0.6202137,-0.05521234, -0.5756007,0.3932209,-0.5756007,-0.3932209,-0.4166034,-0.0651337, -0.4166034,0.0651337, -0.1723716,0.1138941,-0.1723716,-0.1138941}, sd::DataType::DOUBLE);
+
+    ops::helpers::EigenValsAndVecs<double> eig(x);
+
+    ASSERT_TRUE(eig._Vals.isSameShape(&expVals));
+    ASSERT_TRUE(eig._Vals.equalsTo(&expVals));
+
+    ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs));
+    ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs));
+}
+
+/*
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, EigenValsAndVecs_4) {
+
+    NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 ,
+                            -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 ,
+                            0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 ,
+                            6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE);
+    NDArray expVals('c', {10,2}, { -13.08653,3.577011,-13.08653,-3.577011, -1.199166,8.675665,-1.199166,-8.675665,8.962244,
+                                5.610424, 8.962244,-5.610424,  15.19989,5.675794, 15.19989,-5.675794,16.86979,0,-5.52268,0}, sd::DataType::DOUBLE);
+    NDArray expVecs('c', {10,10,2}, {0.1652385,0.1439317,   0.1652385,-0.1439317,     -0.198272,0.207306,    -0.198272,-0.207306,   0.1861466,-0.4599919,    0.1861466,0.4599919,  0.09384053,-0.4889922,   0.09384053,0.4889922, -0.6153314,0, -0.2180209,0,
+                        -0.1603652,-0.1466119,   -0.1603652,0.1466119,    0.2817409,0.3301842,   0.2817409,-0.3301842,  0.09747303,-0.2218182,   0.09747303,0.2218182,   0.2318273,-0.3355113,    0.2318273,0.3355113, -0.4828878,0, -0.1451126,0,
+                        -0.1866771,0.1220412,  -0.1866771,-0.1220412,  0.08937842,-0.3025104,   0.08937842,0.3025104,    0.2783766,0.2258364,   0.2783766,-0.2258364, -0.1413997,-0.09596012,  -0.1413997,0.09596012, -0.2286925,0,  0.3290011,0,
+                        -0.4009741,0.238131,   -0.4009741,-0.238131,  -0.02772353,0.1338458, -0.02772353,-0.1338458,  0.09030543,-0.2222453,   0.09030543,0.2222453,   0.2565825,-0.2275446,    0.2565825,0.2275446, -0.2855937,0, -0.3950544,0,
+                        0.2168379,-0.1301121,    0.2168379,0.1301121,   -0.165433,-0.1220125,    -0.165433,0.1220125, -0.2685605,0.008133055,-0.2685605,-0.008133055,   0.1929395,-0.1194659,    0.1929395,0.1194659,  0.2206467,0,  0.3289105,0,
+                        -0.3835898,-0.2478813,   -0.3835898,0.2478813,  0.1923005,-0.01036433,   0.1923005,0.01036433,  -0.1711637,-0.3548358,   -0.1711637,0.3548358,   0.2888441,0.09625169,  0.2888441,-0.09625169,  0.2595426,0, -0.1288072,0,
+                        0.1033616,0.09839151,  0.1033616,-0.09839151,  -0.3080167,-0.1624564,   -0.3080167,0.1624564,-0.03972293,-0.03967309, -0.03972293,0.03967309,    0.1965443,0.3025898,   0.1965443,-0.3025898, 0.04587166,0,   0.499261,0,
+                        0.2922398,0.2461792,   0.2922398,-0.2461792,   0.2769633,-0.2745029,    0.2769633,0.2745029, 0.1034687,-0.002947149,  0.1034687,0.002947149,  -0.02611308,0.1658046, -0.02611308,-0.1658046,  0.2351063,0, -0.3787892,0,
+                        -0.2512689,-0.02169855,  -0.2512689,0.02169855,  -0.01481625,0.4376404, -0.01481625,-0.4376404,  -0.2298635,-0.2360671,   -0.2298635,0.2360671,     0.11004,-0.1467444,      0.11004,0.1467444,  0.1501568,0,   0.340117,0,
+                        0.325096,0.1712822,    0.325096,-0.1712822, -0.2412035,-0.09236849,  -0.2412035,0.09236849,  0.3894343,-0.08673087,   0.3894343,0.08673087,   0.3125305,0.07128152,  0.3125305,-0.07128152, -0.2415555,0,  0.1841298,0,}, sd::DataType::DOUBLE);
+
+    ops::helpers::EigenValsAndVecs<double> eig(x);
+
+    ASSERT_TRUE(eig._Vals.isSameShape(&expVals));
+    ASSERT_TRUE(eig._Vals.equalsTo(&expVals));
+
+    ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs));
+    ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs));
+}
+*/
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, fullPivLU_1) {
+
+    NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray b('c', {4,1}, {-5.,10,9,1}, sd::DataType::DOUBLE);
+
+    NDArray x = b.ulike();
+
+    NDArray expX('c', {4,1}, {0.8527251, -0.2545784, -1.076495, -0.8526268}, sd::DataType::DOUBLE);
+
+    ops::helpers::FullPivLU<double>::solve(a,b,x);
+
+    ASSERT_TRUE(x.equalsTo(&expX));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, fullPivLU_2) {
+
+    NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray b('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE);
+
+    NDArray x = b.ulike();
+
+    NDArray expX('c', {4,2}, {1.462913,  1.835338, 0.4083664, -2.163816, -3.344481, -3.739225, 0.5156383,0.01624954}, sd::DataType::DOUBLE);
+
+    ops::helpers::FullPivLU<double>::solve(a,b,x);
+
+    ASSERT_TRUE(x.equalsTo(&expX));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, fullPivLU_3) {
+
+    NDArray a1('c', {4,3}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray a2('c', {3,4}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE);
+    NDArray b1('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE);
+    NDArray b2('c', {3,2}, {-5.,10,9,1,1.5,-2}, sd::DataType::DOUBLE);
+
+    NDArray expX1('c', {3,2}, {0.9344955,-0.5841325, 0.8768102,  1.029137, -1.098021,  1.360152}, sd::DataType::DOUBLE);
+    NDArray expX2('c', {4,2}, {0.3536033,0.5270184,0,0,-0.8292221,0.967515,0.01827441,2.856337}, sd::DataType::DOUBLE);
+
+    NDArray x1 = expX1.ulike();
+    ops::helpers::FullPivLU<double>::solve(a1,b1,x1);
+    ASSERT_TRUE(x1.equalsTo(&expX1));
+
+    NDArray x2 = expX2.ulike();
+    ops::helpers::FullPivLU<double>::solve(a2,b2,x2);
+    ASSERT_TRUE(x2.equalsTo(&expX2));
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(HelpersTests2, fullPivLU_4) {
+
+    NDArray a('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 ,
+                            -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 ,
+                            0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 ,
+                            6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE);
+    NDArray b('c', {10,2}, {-5.,10,9,1,1.5,-2,17,5,3.6,0.12,  -3.1,2.27,-0.5,27.3,8.9,5,-7,8,-9,10}, sd::DataType::DOUBLE);
+
+    NDArray x = b.ulike();
+
+    NDArray expX('c', {10,2}, {-0.697127, 2.58257, 2.109721,3.160622,-2.217796, -3.275736,-0.5752479,  2.475356,1.996841, -1.928947,
+                            2.213154,3.541014, 0.7104885, -1.981451,-3.297972,-0.4720612, 3.672657, 0.9161028, -2.322383, -1.784493}, sd::DataType::DOUBLE);
+
+    ops::helpers::FullPivLU<double>::solve(a,b,x);
+
+    ASSERT_TRUE(x.equalsTo(&expX));
+}
diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp
index 669574fa7..8150976e1 100644
--- a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp
@@ -90,6 +90,9 @@ TEST_F(NDArrayTest, NDArrayOrder1) {
     auto arrayF =  new NDArray(arrayC->dup('f'));
     auto arrayC2 = new NDArray(arrayF->dup('c'));
 
+    arrayF->syncToHost();
+    arrayC2->syncToHost();
+
     ASSERT_EQ('c', arrayC->ordering());
     ASSERT_EQ('f', arrayF->ordering());
     ASSERT_EQ('c', arrayC2->ordering());
diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
index 3421edf95..3d0df208f 100644
--- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
@@ -251,7 +251,7 @@ TEST_F(NativeOpsTests, ExecPairwise_2) {
     auto exp = NDArrayFactory::create<bool>('c', {5, 5});
     x.assign(true);
     y.assign(false);
-    y.t<bool>(5) = true;
+    y.r<bool>(5) = true;
 #ifdef __CUDABLAS__
     printf("Unsupported for cuda now.\n");
 #else
diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
index f8086c9fe..f4c8bd2fa 100644
--- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
@@ -1168,6 +1168,529 @@ TEST_F(PlaygroundTests, lstmLayerCellBp_1) {
 }
 
 
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 3;
+
+    const int dataFormat = 0;       // [sL,bS,nIn]
+    const int directionMode = 0;    // forward
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = false;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;  // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 3;
+
+    const int dataFormat = 1;       // [bS,sL,nIn]
+    const int directionMode = 0;    // forward
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = false;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut]
+    const auto retLastH   = false;  // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector<bool>(), {0., 1.}, GradCheck::LossFunc::MEAN);
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) {
+
+    const int sL   = 4;
+    const int bS   = 3;
+    const int nIn  = 3;
+    const int nOut = 2;
+
+    const int dataFormat = 2;       // [bS, nIn, sL]
+    const int directionMode = 0;    // forward
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = true;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       //  clipping
+
+    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
+    NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 3;
+
+    const int dataFormat = 1;       // [bS,sL,nIn]
+    const int directionMode = 1;    // backward
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = false;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 2;
+
+    const int dataFormat = 2;       // [bS, nIn, sL]
+    const int directionMode = 1;    // backward
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = true;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {4*nOut}, sd::DataType::DOUBLE);
+    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 2;
+
+    const int dataFormat = 2;       // [bS, nIn, sL]
+    const int directionMode = 2;    // bidirectional sum
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = true;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
+    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 2;
+
+    const int dataFormat = 1;       // [bS,sL,nIn]
+    const int directionMode = 3;    // bidirectional concat
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = true;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
+    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) {
+
+    const int sL   = 3;
+    const int bS   = 2;
+    const int nIn  = 2;
+    const int nOut = 2;
+
+    const int dataFormat = 3;       // [sL, bS, nIn]
+    const int directionMode = 4;    // bidirectional extra output dim
+    const int gateAct = 2;          // sigmoid activation for input (i), forget (f) and output (o) gates
+    const int cellAct = 0;          // tanh activation for cell state
+    const int outAct = 0;           // tanh activation for output
+
+    const bool hasBiases  = true;   // biases array is provided
+    const bool hasSeqLen  = true;  // seqLen array is not provided
+    const auto hasInitH   = true;   // initial output is provided
+    const auto hasInitC   = true;   // initial cell state is provided
+    const auto hasPH      = true;   // peephole connections are absent
+    const auto retFullSeq = true;   // dLdh per each time step
+    const auto retLastH   = true;   // output at last time step
+    const auto retLastC   = true;   // cells state at last time step
+
+    const double cellClip = 0.5;       // clipping
+
+    NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE);
+    NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE);
+    NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE);
+    NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+    NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE);
+
+    x.linspace(-2,0.1);
+    hI.linspace(-1.5,0.1);
+    cI.linspace(0.7,-0.1);
+    Wx.linspace(1,-0.1);
+    Wr.linspace(-1,0.1);
+    Wp.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    std::vector<double>   tArgs = {cellClip};
+    std::vector<Nd4jLong> iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct};
+    std::vector<bool>     bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC};
+
+    const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs);
+    const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs);
+    // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs);
+
+    sd::ops::lstmLayer opFF;
+    sd::ops::lstmLayer_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true});
+
+    ASSERT_TRUE(isGradCorrect);
+}
+
+//////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests15, gru_bp_1) {
+
+    const int sL = 3;
+    const int bS = 2;
+    const int nIn = 5;
+    const int nOut = 4;
+
+
+    NDArray x('c', {sL, bS, nIn}, {0.5,  1. ,  1.5,  2. ,  2.5, 3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,  6.5,  7. ,  7.5, 8. ,  8.5,  9. ,  9.5, 10. ,  10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE);
+    NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE);
+    NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE);
+    NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE);
+    NDArray b('c', {3*nOut}, sd::DataType::DOUBLE);
+
+    NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE);
+
+    Wx.linspace(1,-0.1);
+    Wh.linspace(0.2,0.2);
+    b.linspace(1,-0.15);
+
+    const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {});
+    const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {});
+
+    sd::ops::gru opFF;
+    sd::ops::gru_bp opBP;
+
+    const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP);
+}
 
 */
 
diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
index c4c1806bd..37facc43c 100644
--- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
@@ -100,7 +100,7 @@ TEST_F(RNGTests, TestGenerator_SGA_1) {
     for (auto idx = 0; idx < array.lengthOf(); idx++) {
         float x = generator.relativeT(idx, -sd::DataTypeUtils::template max<float>() / 10,
                                       sd::DataTypeUtils::template max<float>() / 10);
-        array.t<float>(idx) = x;
+        array.r<float>(idx) = x;
     }
     auto minimum = array.reduceNumber(reduce::AMin);
     minimum.printBuffer("Randomly float min on 1M array");
@@ -285,7 +285,7 @@ TEST_F(RNGTests, Test_Gaussian_21) {
     ASSERT_NEAR(sd::math::nd4j_abs(mean->e<float>(0)), 0.f, 0.2f);
     ASSERT_NEAR(variance->e<float>(0), 1.0f, 0.2f);
 
-    
+
 }
 
 #ifdef DEBUG_BUILD
@@ -315,7 +315,7 @@ TEST_F(RNGTests, Test_Gaussian_22) {
     //variance0->printIndexedBuffer("Variance");
     ASSERT_NEAR(sd::math::nd4j_abs(mean0->e<float>(0)), 0.f, 1.0e-3f);
     ASSERT_NEAR(variance0->e<float>(0), 1.0f, 1.e-3f);
-    
+
 }
 
 TEST_F(RNGTests, Test_Gaussian_3) {
@@ -431,7 +431,7 @@ TEST_F(RNGTests, Test_Truncated_21) {
 
     // result.at(0)->printBuffer("MEAN");
     // result.at(1)->printBuffer("VARIANCE");
-    
+
     sd::ops::reduce_min minOp;
     sd::ops::reduce_max maxOp;
 
@@ -585,7 +585,7 @@ TEST_F(RNGTests, Test_Uniform_2) {
     ASSERT_TRUE(x1.equalsTo(z));
 
     delete op;
-    
+
 }
 
 TEST_F(RNGTests, Test_Uniform_SGA_3) {
@@ -614,7 +614,7 @@ TEST_F(RNGTests, Test_Gaussian_2) {
     ASSERT_TRUE(x1.equalsTo(z));
 
     delete op;
-    
+
 }
 
 TEST_F(RNGTests, Test_LogNorm_2) {
@@ -634,7 +634,7 @@ TEST_F(RNGTests, Test_LogNorm_2) {
     ASSERT_TRUE(x1.equalsTo(z));
 
     delete op;
-    
+
 }
 
 TEST_F(RNGTests, Test_TruncatedNorm_2) {
@@ -653,7 +653,7 @@ TEST_F(RNGTests, Test_TruncatedNorm_2) {
     ASSERT_TRUE(x1.isSameShape(z));
     ASSERT_TRUE(x1.equalsTo(z));
     delete op;
-    
+
 }
 
 
@@ -674,7 +674,7 @@ TEST_F(RNGTests, Test_Binomial_2) {
     ASSERT_TRUE(x1.equalsTo(z));
 
     delete op;
-    
+
 }
 
 
@@ -695,7 +695,7 @@ TEST_F(RNGTests, Test_Bernoulli_2) {
     ASSERT_TRUE(x1.equalsTo(z));
 
     delete op;
-    
+
 }
 
 TEST_F(RNGTests, Test_GaussianDistribution_1) {
@@ -716,7 +716,7 @@ TEST_F(RNGTests, Test_GaussianDistribution_1) {
     ASSERT_FALSE(nexp1->equalsTo(z));
     ASSERT_FALSE(nexp2->equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_BernoulliDistribution_1) {
@@ -736,7 +736,7 @@ TEST_F(RNGTests, Test_BernoulliDistribution_1) {
     ASSERT_FALSE(nexp1->equalsTo(z));
     ASSERT_FALSE(nexp2->equalsTo(z));
 
-    
+
 }
 
 
@@ -787,7 +787,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_1_SGA) {
     ASSERT_FALSE(nexp1->equalsTo(z));
     ASSERT_FALSE(nexp2->equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_ExponentialDistribution_2_SGA) {
@@ -880,7 +880,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_2) {
     ASSERT_FALSE(nexp1->equalsTo(z));
     ASSERT_FALSE(nexp2->equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_PoissonDistribution_1) {
@@ -900,7 +900,7 @@ TEST_F(RNGTests, Test_PoissonDistribution_1) {
     ASSERT_TRUE(exp0.isSameShape(z));
     ASSERT_FALSE(exp0.equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_GammaDistribution_1) {
@@ -920,7 +920,7 @@ TEST_F(RNGTests, Test_GammaDistribution_1) {
     ASSERT_TRUE(exp0.isSameShape(z));
     ASSERT_FALSE(exp0.equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_GammaDistribution_2) {
@@ -941,7 +941,7 @@ TEST_F(RNGTests, Test_GammaDistribution_2) {
     ASSERT_TRUE(exp0.isSameShape(z));
     ASSERT_FALSE(exp0.equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_GammaDistribution_3) {
@@ -962,7 +962,7 @@ TEST_F(RNGTests, Test_GammaDistribution_3) {
     ASSERT_TRUE(exp0.isSameShape(z));
     ASSERT_FALSE(exp0.equalsTo(z));
 
-    
+
 }
 
 TEST_F(RNGTests, Test_UniformDistribution_04) {
@@ -980,7 +980,7 @@ TEST_F(RNGTests, Test_UniformDistribution_04) {
     ASSERT_TRUE(exp0.isSameShape(z));
     ASSERT_FALSE(exp0.equalsTo(z));
 
-    
+
 }
 
 namespace sd {
@@ -1142,7 +1142,7 @@ TEST_F(RNGTests, test_multinomial_1) {
     ASSERT_EQ(Status::OK(), result.status());
     ASSERT_TRUE(expectedZ.isSameShape(outputZ));
     ASSERT_TRUE(expectedZ.equalsTo(outputZ));
-    
+
 }
 
 TEST_F(RNGTests, test_multinomial_2) {
@@ -1219,7 +1219,7 @@ TEST_F(RNGTests, test_multinomial_5) {
     RandomGenerator rng(1234, 1234);
 
     ASSERT_EQ(Status::OK(), op.execute(rng, { &probs, &samples }, { &output }, {}, { 1 }, {}, {}, false));
-    
+
     auto deviation = output.varianceNumber(variance::SummaryStatsStandardDeviation, false);
     auto mean = output.meanNumber();
     // printf("Var: %f  Mean: %f \n", deviation.e<double>(0), mean.e<double>(0));
@@ -1290,7 +1290,7 @@ TEST_F(RNGTests, test_multinomial_6) {
     ASSERT_NEAR(1.2175, deviation.e<double>(0), 45e-3); // 1000000 35e-3);
     ASSERT_NEAR(2.906, mean.e<double>(0), 45e-3); // 1000000 35e-3);
 
-    
+
 
     RandomGenerator rng(1234, 1234);
     NDArray probs('c', { batchValue, ClassValue }, { 1., 1.5, 2., 2.5, 3. }, sd::DataType::FLOAT32);
diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
index 7d3073b58..92084ef74 100644
--- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
+++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
@@ -115,7 +115,7 @@ elseif(WIN32)
 		set(CMAKE_CXX_FLAGS  " -g -fPIC -std=c++11 -Wa,-mbig-obj")
 	endif()
 else()
-    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -DLINUX_BUILD=true")
+    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -ffast-math -DFFAST_MATH=true -DLINUX_BUILD=true")
 
     if ("${_RELEASE}" OR CMAKE_BUILD_TYPE STREQUAL "Release")
         message("Release build for tests")
@@ -225,6 +225,17 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(MINGW) AND NOT(APPLE))
     SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic")
 endif()
 
+file(GLOB_RECURSE COMPILATION_UNITS false ../../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in)
+foreach(FL_ITEM ${COMPILATION_UNITS})
+    string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM})
+    set(FL_ITEM_WLE ${CMAKE_MATCH_1})
+    foreach(FL_TYPE_INDEX RANGE 0 9)
+        #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
+        configure_file(  "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY)
+        LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp )
+    endforeach()
+endforeach()
+
 
 # this function strips path from file name, basically making up short file name, i.e. file.cpp
 function(SHORTNAME LONG_NAME OUTPUT)

From deb87b04f7aeaba748d57fc6182e8f1cb9507e20 Mon Sep 17 00:00:00 2001
From: Alex Black <blacka101@gmail.com>
Date: Fri, 15 May 2020 15:34:08 +1000
Subject: [PATCH 04/21] Assorted fixes (#466)

* Timeouts and fixes

Signed-off-by: Alex Black <blacka101@gmail.com>

* Increase default timeout to 90s due to slow PPC CI machines

Signed-off-by: Alex Black <blacka101@gmail.com>

* Another timeout tweak

Signed-off-by: Alex Black <blacka101@gmail.com>

* Svhn

Signed-off-by: Alex Black <blacka101@gmail.com>
---
 .../main/java/org/deeplearning4j/BaseDL4JTest.java    |  2 +-
 .../datasets/fetchers/SvhnDataFetcherTest.java        |  9 +++++++--
 .../optimizer/listener/TestCheckpointListener.java    | 11 ++++++-----
 .../optimizer/listener/TestListeners.java             |  5 +++++
 .../nn/multilayer/MultiLayerNetwork.java              |  7 +++++++
 .../train/GradientSharingTrainingTest.java            |  2 +-
 .../main/java/org/nd4j/common/tests/BaseND4JTest.java |  2 +-
 7 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java b/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java
index 46daaa5f5..b74df2d2c 100644
--- a/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java
+++ b/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java
@@ -68,7 +68,7 @@ public abstract class BaseDL4JTest {
      * Override this method to set the default timeout for methods in the test class
      */
     public long getTimeoutMilliseconds(){
-        return 60_000;
+        return 90_000;
     }
 
     /**
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java
index 1815dff73..58587615d 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/fetchers/SvhnDataFetcherTest.java
@@ -24,17 +24,22 @@ import org.junit.rules.Timeout;
 import java.io.File;
 
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
 
 /**
  * @author saudet
  */
 public class SvhnDataFetcherTest extends BaseDL4JTest {
 
-    @Rule
-    public Timeout timeout = Timeout.seconds(600);
+    @Override
+    public long getTimeoutMilliseconds() {
+        return 480_000L;    //Shouldn't take this long but slow download or drive access on CI machines may need extra time.
+    }
 
     @Test
     public void testSvhnDataFetcher() throws Exception {
+        assumeTrue(isIntegrationTests());   //Ignore unless integration tests - CI can get caught up on slow disk access
+
         SvhnDataFetcher fetch = new SvhnDataFetcher();
         File path = fetch.getDataSetPath(DataSetType.TRAIN);
         File path2 = fetch.getDataSetPath(DataSetType.TEST);
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java
index 5c5f9e385..131930623 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java
@@ -183,11 +183,11 @@ public class TestCheckpointListener extends BaseDL4JTest {
 
         CheckpointListener l = new CheckpointListener.Builder(f)
                 .keepLast(3)
-                .saveEvery(4, TimeUnit.SECONDS)
+                .saveEvery(4900, TimeUnit.MILLISECONDS)
                 .build();
         net.setListeners(l);
 
-        for(int i=0; i<5; i++ ){   //10 iterations total
+        for(int i=0; i<3; i++ ){   //10 iterations total
             net.fit(iter);
             Thread.sleep(5000);
         }
@@ -211,9 +211,10 @@ public class TestCheckpointListener extends BaseDL4JTest {
             ns.add(n.getIterationCount());
         }
 
-        assertEquals(3, l.availableCheckpoints().size());
-        assertEquals(ns.toString(), 3, ns.size());
-        assertTrue(ns.containsAll(Arrays.asList(4,6,8)));
+        assertEquals(2, l.availableCheckpoints().size());
+        assertEquals(ns.toString(), 2, ns.size());
+        System.out.println(ns);
+        assertTrue(ns.containsAll(Arrays.asList(2,4)));
     }
 
     @Test
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java
index 8cd72e770..cac30a7e4 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java
@@ -70,6 +70,11 @@ public class TestListeners extends BaseDL4JTest {
     @Rule
     public TemporaryFolder tempDir = new TemporaryFolder();
 
+    @Override
+    public long getTimeoutMilliseconds() {
+        return 90000L;
+    }
+
     @Test
     public void testSettingListenersUnsupervised() {
         //Pretrain layers should get copies of the listeners, in addition to the
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
index 54acb31d7..2091babb0 100755
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@@ -767,6 +767,13 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
         if (!isInitCalled())
             init();
 
+        if (solver == null) {
+            try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
+                solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this)
+                        .build();
+            }
+        }
+
         solver.getOptimizer().setGradientsAccumulator(accumulator);
     }
 
diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java
index ab034604e..68a012b72 100644
--- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java
+++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java
@@ -75,7 +75,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest {
 
     @Override
     public long getTimeoutMilliseconds() {
-        return 90000L;
+        return 180000L;
     }
 
     @Test
diff --git a/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java b/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java
index 54bad9876..eceec6216 100644
--- a/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java
+++ b/nd4j/nd4j-common-tests/src/main/java/org/nd4j/common/tests/BaseND4JTest.java
@@ -55,7 +55,7 @@ public abstract class BaseND4JTest {
      * Override this method to set the default timeout for methods in the test class
      */
     public long getTimeoutMilliseconds(){
-        return 60_000;
+        return 90_000;
     }
 
     /**

From 51ce6927fd03f4fc42f498b02dd28475ecab1df5 Mon Sep 17 00:00:00 2001
From: raver119 <raver119@gmail.com>
Date: Sat, 16 May 2020 10:44:58 +0300
Subject: [PATCH 05/21] FP Mod (#468)

* mod

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* couple of tests for updated mod

Signed-off-by: raver119@gmail.com <raver119@gmail.com>
---
 libnd4j/include/ops/ops.h                     | 13 +--
 .../layers_tests/PlaygroundTests.cpp          |  2 -
 .../layers_tests/PrimitivesTests.cpp          | 92 +++++++++++++++++++
 3 files changed, 96 insertions(+), 11 deletions(-)
 create mode 100644 libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp

diff --git a/libnd4j/include/ops/ops.h b/libnd4j/include/ops/ops.h
index 21cd07c40..ea52e9ba0 100644
--- a/libnd4j/include/ops/ops.h
+++ b/libnd4j/include/ops/ops.h
@@ -919,17 +919,12 @@ namespace simdOps {
 	template <typename X, typename Y, typename Z>
 	class Mod {
 	public:
-		/*
-
-		 // just a optional note, feel free to remove later
-
-		op_def static half op(half d1, half d2, half *params) {
-			return __float2half(simdOps::Mod<float>::op(__half2float(d1), __half2float(d2), nullptr));
-		}
-		 */
 
 		op_def static Z op(X d1, Y d2) {
-            return static_cast<int>(d1) % static_cast<int>(d2);
+		  auto dx = static_cast<X>(d2);
+		  auto f = sd::math::nd4j_floor<X, X>(d1 / dx);
+		  auto r = f * dx;
+		  return d1 - r;
         }
 
 		op_def static Z op(X d1, Y d2, Z *params) {
diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
index f4c8bd2fa..91ddcbd30 100644
--- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
@@ -59,8 +59,6 @@ public:
     int poolSize = 10;
 
     PlaygroundTests() {
-        printf("\n");
-        fflush(stdout);
     }
 };
 
diff --git a/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp b/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp
new file mode 100644
index 000000000..f131a1520
--- /dev/null
+++ b/libnd4j/tests_cpu/layers_tests/PrimitivesTests.cpp
@@ -0,0 +1,92 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver110@gmail.com
+//
+
+#include "testlayers.h"
+#include <graph/Graph.h>
+#include <chrono>
+#include <graph/Node.h>
+#include <ops/declarable/CustomOperations.h>
+#include <graph/profiling/GraphProfilingHelper.h>
+#include <loops/type_conversions.h>
+#include <helpers/threshold.h>
+#include <helpers/MmulHelper.h>
+#include <ops/ops.h>
+#include <helpers/OmpLaunchHelper.h>
+#include <helpers/GradCheck.h>
+#include <ops/declarable/helpers/im2col.h>
+#include <helpers/Loops.h>
+#include <helpers/RandomLauncher.h>
+#include <ops/declarable/helpers/convolutions.h>
+
+#include <helpers/BenchmarkHelper.h>
+#include <ops/declarable/helpers/scatter.h>
+#include <helpers/ConstantShapeHelper.h>
+#include <helpers/ConstantTadHelper.h>
+#include <array>
+#include <performance/benchmarking/FullBenchmarkSuit.h>
+#include <performance/benchmarking/LightBenchmarkSuit.h>
+#include <random>
+#include <ops/declarable/helpers/legacy_helpers.h>
+#include <ops/declarable/helpers/addBias.h>
+#include <ops/declarable/helpers/axis.h>
+#include <ops/declarable/helpers/reductions.h>
+#include <helpers/LoopsCoordsHelper.h>
+
+using namespace sd;
+using namespace sd::graph;
+
+class PrimitivesTests : public testing::Test {
+ public:
+
+  PrimitivesTests() {
+  }
+};
+
+TEST_F(PrimitivesTests, test_mod_1) {
+  int ix = 7;
+  int iy = 3;
+
+
+  auto v = simdOps::Mod<int, int, int>::op(ix, iy);
+
+  ASSERT_EQ(7 % 3, v);
+}
+
+TEST_F(PrimitivesTests, test_mod_2) {
+  float ix = 7.f;
+  float iy = 3.f;
+
+
+  auto e = sd::math::nd4j_fmod<float, float, float>(ix, iy);
+  auto v = simdOps::Mod<float, float, float>::op(ix, iy);
+
+  ASSERT_NEAR(e, v, 1e-5f);
+}
+
+TEST_F(PrimitivesTests, test_mod_3) {
+  float ix = 7.f;
+  float iy = 0.f;
+
+
+  auto e = sd::math::nd4j_fmod<float, float, float>(ix, iy);
+  auto v = simdOps::Mod<float, float, float>::op(ix, iy);
+
+  // absence of SIGFPE will be a good enough
+}
\ No newline at end of file

From 4bdd5cb8ff4b08b80031abebfd88d4da8307f7f9 Mon Sep 17 00:00:00 2001
From: Alex Black <blacka101@gmail.com>
Date: Sat, 16 May 2020 22:44:31 +1000
Subject: [PATCH 06/21] Add SameDiff file format ADR [WIP] (#467)

* Add SameDiff file format ADR

Signed-off-by: Alex Black <blacka101@gmail.com>

* Update 0001-SameDiff_File_Format.md

* Update

Signed-off-by: Alex Black <blacka101@gmail.com>
---
 nd4j/ADRs/0001-SameDiff_File_Format.md | 100 +++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 nd4j/ADRs/0001-SameDiff_File_Format.md

diff --git a/nd4j/ADRs/0001-SameDiff_File_Format.md b/nd4j/ADRs/0001-SameDiff_File_Format.md
new file mode 100644
index 000000000..84dcfb6c9
--- /dev/null
+++ b/nd4j/ADRs/0001-SameDiff_File_Format.md
@@ -0,0 +1,100 @@
+# SameDiff file format proposal
+
+## Status
+Accepted
+
+Proposed by: Alex Black (15-05-2020)
+
+Discussed with: raver119
+
+## Context
+
+SameDiff models need to be serializable - i.e., something we can save to disk or send over the network.
+Additionally, we need to be able to save and load model files in C++, and have those be readable in other languages (mainly Java).
+
+Currently, we have a FlatBuffers-based format for SameDiff graph serialization, but it has a number of problems, as discussed in this issue: https://github.com/eclipse/deeplearning4j/issues/8312
+
+
+## Decision
+
+We will transition from a pure FlatBuffers to a Zip + FlatBuffers model format.
+
+FlatBuffers will be used for the graph structure only. Parameters will be stored separately to the graph structure, also within the zip.
+
+We will introduce the ability to support multiple versions of a graph in the model files.
+This will enable the model file to support storing 
+* Multiple data types (for example, a FP32 version and a quantized INT8 version)
+* Multiple different checkpoints (parameters after 1000 iterations, after 5000, and so on)
+* Multiple versions of a given model (English vs. Chinese, or cased/uncased, etc)
+
+By default when loading a graph (unless it is otherwise specified) we will load the most recent model tag.
+Tags must be valid file/folder identifiers, and are not case sensitive.
+
+
+The structure of the zip file will be as follows:
+```
+tags.txt                         //List of graph tags, one per line, in UTF8 format, no duplicates. Oldest first, newest last
+<tag_name>/graph.fb              //The graph structure, in FlatBuffers format
+<tag_name>/params.txt            //The mapping between variable names and parameter file names
+<tag_name>/params/*.fb           //The set of NDArrays that are the parameters, in FlatBuffers format
+<tag_name>/trainingConfig.fb     //The training configuration - updater, learning rate, etc
+<tag_name>/updater.txt           //The mapping between variable names and the updater state file names
+<tag_name>/updater/*.fb          //The set of NDArrays that are the updater state
+```
+
+Note that params.txt will allow for parameter sharing via references to other parameters:
+```
+my_normal_param 0
+shared_param <other_tag_name>/7
+```
+This means the parameters values for parameter "my_normal_param" are present at `<tag_name>/params/0.fb` within the zip file, and the parameter values for "shared_param" are available at `<other_tag_name>/params/7.fb`
+
+Note also that the motivation for using the params.txt file (instead of the raw parameter name as the file name) is that some parameters will have invalid or ambiguous file names - "my/param/name", "&MyParam*" etc
+
+In terms of updater state, they will be stored in a similar format. For example, for the Adam updater with the M and V state arrays (each of same shape as the parameter):
+```
+my_param 0 1
+other_param 2 3
+```
+That means my_param(M) is `<tag_name>/updater/0.fb` and my_param(V) is at `<tag_name>/updater/1.fb`
+This format also allows for updater state sharing, if we need it.
+
+
+**Graph Structure**
+
+The graph structure will be encoded in FlatBuffers format using a schema with 2 parts:
+1. A list of variables - each with name, datatype, and (for placeholders, constants and parameters) a shape
+2. A list of operations - each with a name, op name/type, input variable names, output variable names, and arguments
+
+Note that both legacy and custom ops will be encoded in the same way. For legacy ops, we simply need the operation type, and the operation number.
+
+Operation argument encoding will be done using named arguments: essentially, a `Map<String,T>` structure, where T is one of `{long, double, boolean, datatype}`.
+This allows for improved backward compatibility (no ambiguity as ops are modified after a graph file was written) and improved interpretability compared to using simple arrays of iargs, bargs, targs and dargs.
+One consequence/downside of this is that we need to define mapping between our named arguments and iargs/bargs/targs/dargs. In Java we have essentially done this manually, though clearly don't want to replicate this work in C++ (or any future languages).
+
+To avoid the need to do a significant amount of work (such as moving the name/arg mapping to code generation) the following is proposed:
+The `Map<String,T>` is split up in the FlatBuffers schema into 4 pairs of fields.
+* `String[] iArgNames`, `long[] iArgs`
+* `String[] tArgNames`, `double[] dArgs`
+* `String[] bArgNames`, `boolean[] bArgs`
+* `String[] dArgNames`, `DataType[] dArgs`
+
+Clearly the name and value arrays (for each pair) would each be the same length, and name/value correspondence is by array index.
+
+This is essentially equivalent to the `Map<String,T>` representation, but has the benefit of not needing us to define the mapping for named args to array-style args any time soon in C++, but also allowing us to add it in the future (mainly before we can write graphs from C++, or have better/proper backward compatibility after op changes)
+
+
+**Extensibility to Other Types**
+
+Suppose in the future we want to store other data for a variable, not just an array?
+Examples include lists and maps (for example, for NLP applications).
+
+While we will not implement this right now, there are a number of options for adding this without breaking backward compatibility.
+
+First: we can enhance the params.txt file format, perhaps using something like the following:
+```
+map_param 0 MAP
+```
+
+Second: We can add a similar text file for other types. For example, a params_maps.txt, same format as params.txt, with content at `<tag_name>/params_maps/*.fb`
+

From 6e9c849e4a4e6ae87c8876b6b3929d6581c20f39 Mon Sep 17 00:00:00 2001
From: Paul Dubs <paul.dubs@gmail.com>
Date: Mon, 18 May 2020 07:46:46 +0200
Subject: [PATCH 07/21] Fix typo (#469)

---
 .../org/deeplearning4j/nn/modelimport/keras/KerasModel.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
index 8aa38439c..b57171a14 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
@@ -171,7 +171,7 @@ public class KerasModel {
                 importTrainingConfiguration(trainingJson);
             else log.warn("If enforceTrainingConfig is true, a training " +
                     "configuration object has to be provided. Usually the only practical way to do this is to store" +
-                    " your keras model with `model.save('model_path.h5'. If you store model config and weights" +
+                    " your keras model with `model.save('model_path.h5')`. If you store model config and weights" +
                     " separately no training configuration is attached.");
         }
 

From ec757f654d92e9d4199adf17690420525fd5bd53 Mon Sep 17 00:00:00 2001
From: Andrii T <39699084+atuzhykov@users.noreply.github.com>
Date: Tue, 19 May 2020 17:18:52 +0300
Subject: [PATCH 08/21] Tensorflow import tests and fixes (#435)

* ignored ops checked

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* reconfigured AdjustContrast + commented primitive_gru

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* minor changes + exception ops commented

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* figured out non existent tf ops and random ops check

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* minor changes to tensorflowop and randomness cheks

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* deconv2d tensorfloname removed

* Fix Flatbuffers ser/de with character fields

Signed-off-by: Alex Black <blacka101@gmail.com>

* TFGraphTestAllSameDiff tests passed except NonMaxSuppression

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* minor changes

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* temporary ignored section added

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* ignores removed

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* org.nd4j.base.Preconditions -> org.nd4j.common.base.Preconditions

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* temsorflownames reverts and replace CopyHost

* ignored mod op tests due to known issue

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* rsestored mod after fixing in cpp level

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* ignored random_shuffle op test due to known issue

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* increased random_uniform mean/std comparator sensitivity

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

* igmored random tests due to SameDiff RNG seed is not set.

Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com>

Co-authored-by: Alex Black <blacka101@gmail.com>
---
 .../functions/DifferentialFunction.java       |  4 +
 .../samediff/serde/FlatBuffersMapper.java     |  2 +
 .../autodiff/validation/OpValidation.java     |  7 +-
 .../converters/ImportClassMapping.java        |  1 -
 .../linalg/api/ops/custom/AdjustContrast.java | 37 ++++++--
 .../api/ops/custom/AdjustContrastV2.java      | 44 ---------
 .../api/ops/custom/BaseAdjustContrast.java    | 52 -----------
 .../api/ops/custom/CompareAndBitpack.java     | 12 +++
 .../linalg/api/ops/custom/RgbToGrayscale.java |  4 -
 .../nd4j/linalg/api/ops/custom/RgbToYiq.java  |  5 -
 .../nd4j/linalg/api/ops/custom/RgbToYuv.java  |  5 -
 .../nd4j/linalg/api/ops/custom/YiqToRgb.java  |  5 -
 .../nd4j/linalg/api/ops/custom/YuvToRgb.java  |  4 -
 .../ops/impl/image/NonMaxSuppressionV3.java   |  2 +-
 .../ops/impl/layers/convolution/DeConv2D.java |  5 -
 .../transforms/custom/IsNonDecreasing.java    |  6 --
 .../pairwise/arithmetic/CopyOp.java           |  2 +-
 .../ops/impl/transforms/same/Identity.java    |  2 +-
 .../segment/UnsortedSegmentMean.java          |  4 -
 .../segment/UnsortedSegmentSqrtN.java         |  5 -
 .../api/ops/random/custom/RandomGamma.java    |  4 +-
 .../api/ops/random/impl/DropOutInverted.java  |  6 --
 .../ops/random/impl/UniformDistribution.java  |  6 --
 .../TFGraphs/TFGraphTestAllHelper.java        | 63 ++++++++++++-
 .../TFGraphs/TFGraphTestAllSameDiff.java      | 93 ++++++++-----------
 .../nd4j/linalg/custom/CustomOpsTests.java    | 16 ----
 26 files changed, 158 insertions(+), 238 deletions(-)
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java
 delete mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java

diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java
index 54707887f..f4f2d6c6b 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java
@@ -261,6 +261,10 @@ public abstract class DifferentialFunction {
                 if(target.getType() == float.class && value instanceof Double){
                     value = ((Double) value).floatValue();
                 }
+                //Edge case: we store char fields as integers, rather than introduce an extra property
+                if(target.getType() == char.class && value instanceof Integer){
+                    value = (char)((Integer)value).intValue();
+                }
 
                 target.set(this,value);
             } catch (IllegalAccessException e) {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java
index 7f44962f0..6253c700d 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/serde/FlatBuffersMapper.java
@@ -483,6 +483,8 @@ public class FlatBuffersMapper {
                 //No op
             } else if (v instanceof Boolean) {
                 b = new boolean[]{(Boolean) v};
+            } else if(v instanceof Character){
+                i = new int[]{(Character)v};
             } else if (v instanceof Number) {
                 if (v instanceof Double) {
                     d = new double[]{(Double) v};
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
index 386ead0b3..21154d8ac 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
@@ -1220,7 +1220,12 @@ public class OpValidation {
                 "absargmax",
                 "absargmin",
                 "entropy_shannon",   //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
-                "count_zero"
+                "count_zero",
+
+                "SaveV2",
+                "LoadV2",
+                "RestoreV2",
+                "RandomCrop" // NotImplementedError: Op RandomCrop is not available in GraphDef version 134. It has been removed in version 8. Random crop is now pure Python.
         );
 
         return new HashSet<>(list);
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
index 63138719c..630b5986d 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java
@@ -625,7 +625,6 @@ public class ImportClassMapping {
             org.nd4j.linalg.api.ops.compat.CompatSparseToDense.class,
             org.nd4j.linalg.api.ops.compat.CompatStringSplit.class,
             org.nd4j.linalg.api.ops.custom.AdjustContrast.class,
-            org.nd4j.linalg.api.ops.custom.AdjustContrastV2.class,
             org.nd4j.linalg.api.ops.custom.HsvToRgb.class,
             org.nd4j.linalg.api.ops.custom.RgbToHsv.class,
             org.nd4j.linalg.api.ops.custom.RgbToYiq.class,
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java
index f842303ca..1dfeca5dc 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrast.java
@@ -1,4 +1,3 @@
-
 /* ******************************************************************************
  * Copyright (c) 2019 Konduit K.K.
  *
@@ -19,14 +18,27 @@ package org.nd4j.linalg.api.ops.custom;
 import lombok.NonNull;
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.common.base.Preconditions;
+import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.DynamicCustomOp;
 
-public class AdjustContrast extends BaseAdjustContrast {
+import java.util.Collections;
+import java.util.List;
 
-    public AdjustContrast() {super();}
+public class AdjustContrast extends DynamicCustomOp {
+
+    public AdjustContrast() {
+        super();
+    }
 
     public AdjustContrast(@NonNull INDArray in, double factor, INDArray out) {
-        super(in, factor, out);
+        Preconditions.checkArgument(in.rank() >= 3,
+                "AdjustContrast: op expects rank of input array to be >= 3, but got %s instead", in.rank());
+        inputArguments.add(in);
+        outputArguments.add(out);
+
+        addTArgument(factor);
     }
 
     public AdjustContrast(@NonNull INDArray in, double factor) {
@@ -34,21 +46,28 @@ public class AdjustContrast extends BaseAdjustContrast {
     }
 
     public AdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable in, @NonNull SDVariable factor) {
-        super(sameDiff,new SDVariable[]{in,factor});
+        super(sameDiff, new SDVariable[]{in, factor});
     }
 
     public AdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable in, double factor) {
-        super(sameDiff,new SDVariable[]{in});
+        super(sameDiff, new SDVariable[]{in});
         addTArgument(factor);
     }
 
     @Override
     public String opName() {
-        return "adjust_contrast";
+        return "adjust_contrast_v2";
     }
 
     @Override
-    public String tensorflowName() {
-        return "AdjustContrast";
+    public String[] tensorflowNames() {
+        return new String[]{"AdjustContrast", "AdjustContrastv2"};
+    }
+
+    @Override
+    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes) {
+        int n = args().length;
+        Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == n, "Expected %s input data types for %s, got %s", n, getClass(), inputDataTypes);
+        return Collections.singletonList(inputDataTypes.get(0));
     }
 }
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java
deleted file mode 100644
index 34b495970..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/AdjustContrastV2.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/* ******************************************************************************
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-package org.nd4j.linalg.api.ops.custom;
-
-import lombok.NonNull;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.linalg.api.ndarray.INDArray;
-
-public class AdjustContrastV2 extends BaseAdjustContrast {
-
-    public AdjustContrastV2() {super();}
-
-    public AdjustContrastV2(@NonNull INDArray in, double factor, INDArray out) {
-        super(in, factor, out);
-    }
-
-    public AdjustContrastV2(@NonNull SameDiff sameDiff, @NonNull SDVariable in, @NonNull SDVariable factor) {
-        super( sameDiff,new SDVariable[]{in,factor});
-    }
-
-    @Override
-    public String opName() {
-        return "adjust_contrast_v2";
-    }
-
-    @Override
-    public String tensorflowName() {
-        return "AdjustContrastv2";
-    }
-}
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java
deleted file mode 100644
index 80c344fe2..000000000
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BaseAdjustContrast.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/* ******************************************************************************
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-package org.nd4j.linalg.api.ops.custom;
-
-import lombok.NonNull;
-import org.nd4j.autodiff.samediff.SDVariable;
-import org.nd4j.autodiff.samediff.SameDiff;
-import org.nd4j.common.base.Preconditions;
-import org.nd4j.linalg.api.buffer.DataType;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.DynamicCustomOp;
-
-import java.util.Collections;
-import java.util.List;
-
-public abstract class BaseAdjustContrast extends DynamicCustomOp {
-    public BaseAdjustContrast() {
-    }
-
-    public BaseAdjustContrast(@NonNull INDArray in, double factor, INDArray out) {
-        Preconditions.checkArgument(in.rank() >= 3,
-                "AdjustContrast: op expects rank of input array to be >= 3, but got %s instead", in.rank());
-        inputArguments.add(in);
-        outputArguments.add(out);
-
-        addTArgument(factor);
-    }
-
-    public BaseAdjustContrast(@NonNull SameDiff sameDiff, @NonNull SDVariable[] vars) {
-        super("", sameDiff, vars);
-    }
-
-    @Override
-    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
-        int n = args().length;
-        Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == n, "Expected %s input data types for %s, got %s", n, getClass(), inputDataTypes);
-        return Collections.singletonList(inputDataTypes.get(0));
-    }
-}
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java
index e8285fe9b..d30c0fe80 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/CompareAndBitpack.java
@@ -17,10 +17,15 @@ package org.nd4j.linalg.api.ops.custom;
 
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.common.base.Preconditions;
+import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.nd4j.linalg.factory.Nd4j;
 
+import java.util.Collections;
+import java.util.List;
+
 public class CompareAndBitpack extends DynamicCustomOp {
     public CompareAndBitpack() {}
 
@@ -47,4 +52,11 @@ public class CompareAndBitpack extends DynamicCustomOp {
     public String tensorflowName() {
         return "CompareAndBitpack";
     }
+
+    @Override
+    public List<DataType> calculateOutputDataTypes(List<DataType> dataTypes){
+        Preconditions.checkState(dataTypes != null && dataTypes.size() == 2, "Expected exactly 2 input datatypes for %s, got input %s", getClass(), dataTypes);
+        Preconditions.checkState(dataTypes.get(0) == dataTypes.get(1), "Input data types must be the same: got %s", dataTypes);
+        return Collections.singletonList(DataType.UINT8);
+    }
 }
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java
index 6b71ba17f..f0e8c3022 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToGrayscale.java
@@ -37,8 +37,4 @@ public class RgbToGrayscale extends DynamicCustomOp {
         return "rgb_to_grs";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "RgbToGrs";
-    }
 }
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java
index 628e770ee..3a2ca46cf 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYiq.java
@@ -42,11 +42,6 @@ public class RgbToYiq extends DynamicCustomOp {
         return "rgb_to_yiq";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "RgbToYiq";
-    }
-
     @Override
     public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
         int n = args().length;
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java
index 8c7ed7353..679e1d3e5 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/RgbToYuv.java
@@ -42,11 +42,6 @@ public class RgbToYuv extends DynamicCustomOp {
         return "rgb_to_yuv";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "RgbToYuv";
-    }
-
     @Override
     public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
         int n = args().length;
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java
index bca9999a8..3f647dfbe 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YiqToRgb.java
@@ -41,11 +41,6 @@ public class YiqToRgb extends DynamicCustomOp {
         return "yiq_to_rgb";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "YiqToRgb";
-    }
-
     @Override
     public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
         int n = args().length;
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java
index d6e52771c..1776a7b85 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/YuvToRgb.java
@@ -42,10 +42,6 @@ public class YuvToRgb extends DynamicCustomOp {
         return "yuv_to_rgb";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "YuvToRgb";
-    }
 
     @Override
     public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java
index d087287cf..77c8642cf 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/NonMaxSuppressionV3.java
@@ -53,7 +53,7 @@ public class NonMaxSuppressionV3 extends DynamicCustomOp {
 
     @Override
     public String[] tensorflowNames() {
-        return new String[]{"NonMaxSuppressionV3","NonMaxSuppressionV4"};
+        return new String[]{"NonMaxSuppressionV3","NonMaxSuppressionV4","NonMaxSuppressionV5"};
     }
 
     @Override
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java
index 1e082f6f3..8e2d82105 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java
@@ -306,11 +306,6 @@ public class DeConv2D extends DynamicCustomOp {
         return "ConvTranspose";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "Conv2DTranspose";
-    }
-
 
     @Override
     public List<SDVariable> doDiff(List<SDVariable> f1) {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java
index 0a9360670..b3627f2db 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/IsNonDecreasing.java
@@ -62,12 +62,6 @@ public class IsNonDecreasing extends DynamicCustomOp {
         return "is_non_decreasing";
     }
 
-
-    @Override
-    public String tensorflowName() {
-        return "IsNonDecreasing";
-    }
-
     @Override
     public List<SDVariable> doDiff(List<SDVariable> f1) {
         return Collections.singletonList(sameDiff.zerosLike(arg()));
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java
index 3ee75d23d..3f3c4754c 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/pairwise/arithmetic/CopyOp.java
@@ -78,7 +78,7 @@ public class CopyOp extends BaseTransformSameOp {
 
     @Override
     public String[] tensorflowNames() {
-        return new String[]{"Copy","DeepCopy","CopyHost"};
+        return new String[]{"Copy"};
     }
 
     @Override
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java
index f9744d8ce..d555e27e7 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/same/Identity.java
@@ -64,7 +64,7 @@ public class Identity extends BaseDynamicTransformOp {
 
     @Override
     public String[] tensorflowNames() {
-        return new String[]{"Identity"};
+        return new String[]{"Identity", "DeepCopy", "CopyHost"};
     }
 
     @Override
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java
index 637eff3bb..84c7e6ab1 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentMean.java
@@ -55,10 +55,6 @@ public class UnsortedSegmentMean extends DynamicCustomOp {
         return "unsorted_segment_mean";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "UnsortedSegmentMean";
-    }
 
     @Override
     public List<SDVariable> doDiff(List<SDVariable> gradients){
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java
index 64b6d2427..9fa88b788 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/segment/UnsortedSegmentSqrtN.java
@@ -55,11 +55,6 @@ public class UnsortedSegmentSqrtN extends DynamicCustomOp {
         return "unsorted_segment_sqrt_n";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "UnsortedSegmentSqrtN";
-    }
-
     @Override
     public List<SDVariable> doDiff(List<SDVariable> gradients){
         return new UnsortedSegmentSqrtNBp(sameDiff, arg(0), arg(1), gradients.get(0), numSegments).outputs();
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java
index 7be70e218..bb2676ba9 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/RandomGamma.java
@@ -71,9 +71,7 @@ public class RandomGamma extends DynamicCustomOp {
 
     @Override
     public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map<String, AttrValue> attributesForNode, GraphDef graph) {
-        if(attributesForNode.containsKey("alpha")) {
-            outputDataType = DataTypeAdapter.dtypeConv(attributesForNode.get("alpha").getType());
-        }
+            outputDataType = DataTypeAdapter.dtypeConv(attributesForNode.get("T").getType());
     }
 
     @Override
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java
index 6b174ae63..53de3559f 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java
@@ -84,12 +84,6 @@ public class DropOutInverted extends BaseRandomOp {
         return "Dropout";
     }
 
-    @Override
-    public String tensorflowName() {
-        return "Dropout";
-    }
-
-
     @Override
     public List<SDVariable> doDiff(List<SDVariable> f1) {
         return null;
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java
index 84fade263..bf1863dda 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/UniformDistribution.java
@@ -100,12 +100,6 @@ public class UniformDistribution extends BaseRandomOp {
         throw new NoOpNameFoundException("No onnx op opName found for " +  opName());
     }
 
-    @Override
-    public String tensorflowName() {
-        return "RandomUniformGG";
-    }
-
-
     @Override
     public List<SDVariable> doDiff(List<SDVariable> f1) {
         return Collections.emptyList();
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java
index 1cc3baa13..c93b3deb7 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllHelper.java
@@ -851,7 +851,68 @@ public class TFGraphTestAllHelper {
             return (t, s) -> Nd4j.sort(t, true).equals(Nd4j.sort(s, true));
         }
 
-        if(modelName.startsWith("alpha_dropout") || modelName.startsWith("layers_dropout") || modelName.equals("dropout"))
+        if(modelName.startsWith("empty")){
+            return (t, s) -> {
+                boolean areEqualShapes = t.equalShapes(s);
+                boolean areEqualDataTypes = t.dataType() == s.dataType();
+                return areEqualShapes && areEqualDataTypes;
+            };        }
+
+        // sum of all elements along dimesions before and after shuffle has to be the same
+        if(modelName.startsWith("random_shuffle")){
+            return (t, s) -> Nd4j.sort(t, true).equals(Nd4j.sort(s, true));
+        }
+
+        if(modelName.startsWith("random_normal")){
+            return (t, s) -> {
+                boolean areEqualShapes = t.equalShapes(s);
+                double meanS = s.meanNumber().doubleValue();
+                double meanT = t.meanNumber().doubleValue();
+                double stdS = s.stdNumber().doubleValue();
+                double stdT = t.stdNumber().doubleValue();
+                double eps = 1;
+                return areEqualShapes && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps);
+            };        }
+
+        if(modelName.startsWith("random_gamma")){
+            return (t, s) -> {
+                boolean areEqualShapes = t.equalShapes(s);
+                boolean nonNegativeValues = (t.minNumber().doubleValue() > 0) && (t.minNumber().doubleValue() > 0);
+                double meanS = s.meanNumber().doubleValue();
+                double meanT = t.meanNumber().doubleValue();
+                double stdS = s.stdNumber().doubleValue();
+                double stdT = t.stdNumber().doubleValue();
+                double eps = 1;
+                return areEqualShapes && nonNegativeValues && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps);
+            };
+         }
+
+        if(modelName.startsWith("random_poisson") || modelName.startsWith("random_poisson_v2")){
+            return (t, s) -> {
+                boolean areEqualShapes = t.equalShapes(s);
+                boolean nonNegativeValues = (t.minNumber().doubleValue() >= 0) && (t.minNumber().doubleValue() >= 0);
+                double meanS = s.meanNumber().doubleValue();
+                double meanT = t.meanNumber().doubleValue();
+                double stdS = s.stdNumber().doubleValue();
+                double stdT = t.stdNumber().doubleValue();
+                double eps = 1;
+                return areEqualShapes && nonNegativeValues && (Math.abs(meanS-meanT) < eps) && (Math.abs(stdS-stdT) < eps);
+            };
+        }
+
+        if(modelName.startsWith("random_uniform")|| modelName.startsWith("random_uniform_int")){
+            return (t, s) -> {
+                boolean areEqualShapes = t.equalShapes(s);
+                double meanS = s.meanNumber().doubleValue();
+                double meanT = t.meanNumber().doubleValue();
+                double stdS = s.stdNumber().doubleValue();
+                double stdT = t.stdNumber().doubleValue();
+                double eps = 1;
+                return areEqualShapes && (Math.abs(stdS-stdT) < eps) && (Math.abs(meanS-meanT) < eps);
+            };
+        }
+
+        if(modelName.startsWith("alpha_dropout") || modelName.startsWith("layers_dropout") || modelName.startsWith("dropout"))
             //We can't compare dropout using simple equality due to randomness
             return (t, s) -> {
                 double[] tfNums = t.ravel().toDoubleVector();
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java
index 72c705852..92ba319ed 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java
@@ -66,23 +66,29 @@ public class TFGraphTestAllSameDiff {   //Note: Can't extend BaseNd4jTest here a
     public static final String[] IGNORE_REGEXES = new String[]{
             //Failing 2019/07/01 - Issue 10, https://github.com/deeplearning4j/deeplearning4j/issues/6958
             //Still failing 2019/09/11
+            //Still failing 2020/04/27
+            //java.lang.IllegalStateException: Requested output variable LogMatrixDeterminant:1 does not exist in SameDiff instance
             "slogdet/.*",
 
             //Failing 2019/09/11 - https://github.com/eclipse/deeplearning4j/issues/7965
+            // Still failing 2020/04/27 java.lang.IllegalStateException: Requested output variable Bincount does not exist in SameDiff instance
             "bincount/.*",
             // Failing 2019/11/14 https://github.com/eclipse/deeplearning4j/issues/8393
             "is_strictly_increasing/emptyArrayTest/.*",
 
             //TODO floormod and truncatemod behave differently - i.e., "c" vs. "python" semantics. Need to check implementations too
+            // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: TruncateMod
             "truncatemod/.*",
 
             //Still failing as of 2019/09/11 - https://github.com/deeplearning4j/deeplearning4j/issues/6464 - not sure if related to: https://github.com/deeplearning4j/deeplearning4j/issues/6447
             "cnn2d_nn/nhwc_b1_k12_s12_d12_SAME",
 
             //2019/09/11 - No tensorflow op found for SparseTensorDenseAdd
+            // 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: SparseTensorDenseAdd
             "confusion/.*",
 
             //2019/09/11 - Couple of tests failing (InferenceSession issues)
+            // Still failing 2020/04/27 Requested output variable concat does not exist in SameDiff instance
             "rnn/bstack/d_.*",
 
             //2019/05/21 - Failing on AVX2/512 intermittently (Linux, OSX), passing elsewhere
@@ -97,87 +103,68 @@ public class TFGraphTestAllSameDiff {   //Note: Can't extend BaseNd4jTest here a
             "g_11",
 
             //2019/07/09 - Need "Multinomial" op - https://github.com/eclipse/deeplearning4j/issues/7913
+            // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: Multinomial
             "multinomial/.*",
 
             //2019/11/04 AB - disabled, pending libnd4j deconv3d_tf implementation
+            // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find descriptor for op: deconv3d_tf - class: org.nd4j.linalg.api.ops.impl.layers.convolution.DeConv3DTF
             "conv3d_transpose.*",
 
             //2019/11/15 - mapping is not present yet https://github.com/eclipse/deeplearning4j/issues/8397
+            // Still failing 2020/04/27 java.lang.AssertionError: Predictions do not match on ragged/reduce_mean/2d_a1, node RaggedReduceMean/truediv
             "ragged/reduce_mean/.*",
 
             // 2019/11/15 - missing dtype argument in nd4j, tests are useless https://github.com/eclipse/deeplearning4j/issues/8398
-            "zeros_like/rank2_float32_dtype_int.*",
+            // Still failing 2020/04/27 java.lang.IndexOutOfBoundsException: 1
+           "zeros_like/rank2_float32_dtype_int.*",
 
             // 11.26.2019 failing - https://github.com/eclipse/deeplearning4j/issues/8453
+            // Still failing 2020/04/27 java.lang.AssertionError: Predictions do not match on roll/rank2_float32_zeroshift, node Roll
             "roll/.*",
 
             // 11.26.2019 failing https://github.com/eclipse/deeplearning4j/issues/8455
+            // still failing 2020/04/27
+            // java.lang.IllegalStateException: Failed to calculate output shapes for op matrix_band_part (MatrixBandPart) - no shapes were returned by calculateOutputShape()
             "matrix_band_part/.*",
 
             // 12.20.2019 - https://github.com/eclipse/deeplearning4j/issues/8559
+            // Still failing 2020/27/04 java.lang.AssertionError: Predictions do not match on fused_batch_norm/float32_nhcw, node FusedBatchNormV3
             "fused_batch_norm/.*",
 
-            // AB 2020/01/04 - https://github.com/eclipse/deeplearning4j/issues/8592
-            "emptyArrayTests/reshape/rank2_shape2-0_2-0--1",
+            // 01.05.2020 -  https://github.com/eclipse/deeplearning4j/issues/8898
+             "primitive_gru",
 
-            //AB 2020/01/07 - Known issues
-            "bitcast/from_float64_to_int64",
-            "bitcast/from_rank2_float64_to_int64",
-            "bitcast/from_float64_to_uint64",
+            // 05.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8921
+            "random_poisson/rank1_float16", "random_poisson/rank1_float32", "random_poisson/rank1_float16", "random_poisson/rank1_half",
+            "random_poisson_v2/rank1_float64", "random_poisson_v2/rank1_float16", "random_poisson_v2/rank1_half",
 
-
-            //NEWLY ADDED TESTCASES from 27/04/2020
-            "non_max_suppression_v2/.*", "non_max_suppression/.*",
+            //08.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8927
             "random_gamma/.*",
-            "non_max_suppression_v5/.*",
-            "non_max_suppression_v4/.*",
-            "non_max_suppression_v3/.*",
-            "dropout/.*",
-            "max_pool_with_argmax/.*",
-            "conv2d_transpose/.*",
+
+            //08.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8928
             "Conv3DBackpropInputV2/.*",
-            "Conv3DBackpropInput/.*",
-            "mod/.*",
-            "leaky_relu/.*",
-            "DeepCopy/.*",
-            "empty/.*",
-            "ones_like/.*",
-            "is_non_decreasing/.*",
-            "div/.*",
-            "lgamma/.*",
+
+            //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8940
+            "compare_and_bitpack/.*",
+
+            //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8943
+            "max_pool_with_argmax/int64_int64_padding_SAME", "max_pool_with_argmax/int32_int64_padding_SAME",
+
+            //12.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8946
+            "non_max_suppression_v4/.*","non_max_suppression_v5/.*",
+
+            // 18.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8960
+            "random_shuffle/.*",
+            // 18.05.2020 - https://github.com/eclipse/deeplearning4j/issues/8963
             "random_uniform/.*",
             "random_uniform_int/.*",
-            "resize_area/.*",
-            "zeros_like_tf1/.*",
-            "Conv2DTranspose/.*",
-            "rgb_to_yuv/.*",
-            "rgb_to_grayscale/.*",
-            "rgb_to_yiq/.*",
-            "losses/.*",
-            "yiq_to_rgb/.*",
-            "yuv_to_rgb/.*",
-            "emptyArrayTests/.*",
             "random_normal/.*",
-            "random_shuffle/.*",
-            "random_poisson_v2/.*",
+            "random_gamma/.*",
             "random_poisson/.*",
-            "random_crop/.*",
-            "compare_and_bitpack/.*",
-            "adjust_contrast/.*",
-            "confusion/.*",
-            "bitcast/.*",
-            "roll/.*",
-            "matrix_band_part/.*",
-            "conv3d_transpose_layers/.*",
-            "multinomial/.*",
-            "unsorted_segment/.*",
-            "cnn2d_nn/.*",
-            "truncatemod/.*",
-            "bincount/.*",
-            "slogdet/.*",
-            "adjust_contrast_v2/.*"
+            "random_poisson/.*",
+            "random_poisson_v2/.*",
 
-};
+  };
 
     /* As per TFGraphTestList.printArraysDebugging - this field defines a set of regexes for test cases that should have
        all arrays printed during execution.
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
index 12658ede8..534b08e25 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
@@ -847,22 +847,6 @@ public class CustomOpsTests extends BaseNd4jTest {
         assertArrayEquals(new long[]{256, 256, 3}, lsd.get(0).getShape());
     }
 
-    @Test
-    public void testAdjustContrastV2() {
-        INDArray in = Nd4j.linspace(DataType.DOUBLE,1.0,1.0, 4*4*3).reshape(4,4,3);
-        INDArray out = Nd4j.createUninitialized(4,4,3);
-
-        INDArray expected = Nd4j.createFromArray(new double[]{-21.5, -20.5, -19.5,  -15.5, -14.5, -13.5,  -9.5,  -8.5,  -7.5,  -3.5,  -2.5,  -1.5,
-                2.5,   3.5,   4.5,    8.5,   9.5,  10.5,  14.5,  15.5,  16.5,  20.5,  21.5,  22.5,
-                26.5,  27.5,  28.5,   32.5,  33.5,  34.5,  38.5,  39.5,  40.5,  44.5,  45.5,  46.5,
-                50.5,  51.5,  52.5,   56.5,  57.5,  58.5,  62.5,  63.5,  64.5,  68.5,  69.5,  70.5
-        }).reshape(4,4,3);
-
-        Nd4j.exec(new AdjustContrastV2(in, 2.0, out));
-
-        assertArrayEquals(out.shape(), in.shape());
-        assertEquals(expected, out);
-    }
 
     @Ignore("AS 11/13/2019 https://github.com/eclipse/deeplearning4j/issues/8374")
     @Test

From 0bc9785508b17e13ca7f58dfe8f3bc061bca89e1 Mon Sep 17 00:00:00 2001
From: Yurii Shyrma <iuriish@yahoo.com>
Date: Tue, 19 May 2020 21:56:41 +0300
Subject: [PATCH 09/21] mkldnn concat call cases correction  (#471)

* - disable mkldnn concat when number of input arrays > 3072

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of loop in calculating of input arrays number

Signed-off-by: Yurii <iuriish@yahoo.com>
---
 libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp
index 9df63556e..3bf97e586 100644
--- a/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp
+++ b/libnd4j/include/ops/declarable/platform/mkldnn/concat.cpp
@@ -178,7 +178,11 @@ PLATFORM_CHECK(concat, ENGINE_CPU) {
 
     const auto zType = z->dataType();
 
-    return z->rankOf() < 7 && (zType==DataType::FLOAT32 || zType==DataType::HALF || zType==DataType::BFLOAT16 || zType==DataType::UINT8 || zType==DataType::INT8);
+    const bool isAxisInLastArr = block.getBArguments()->size() == 0 ? false : B_ARG(0);
+    const int numOfInArrs = isAxisInLastArr ? block.width() - 1 : block.width();
+
+    return z->rankOf() < 7 && numOfInArrs <= 3072
+           && (zType==DataType::FLOAT32 || zType==DataType::HALF || zType==DataType::BFLOAT16 || zType==DataType::UINT8 || zType==DataType::INT8);
 }
 
 }

From bde0a4ec98d9a20cda19b25bb214ae49356647ec Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Thu, 21 May 2020 05:47:12 +0400
Subject: [PATCH 10/21] Python4j (#422)

* types

* pom fix

* basic exec + tests

* safe exec

* exec fixes + tests

* prim tests

* lists and dicts

* collections tests

* list test

* api

* exec and return all vars

* context manager + fixes

* leak fixes

* jobs tests

* gc basic working

* more gc fixed

* copyright headers

* try-catch-finally

* gc fixes

* validate var name (startswith _collapsed..)

* try block refac

* pythonexecutioner nits

* hashset->set

* call() gc fix

* gc fixes

* type check fix

* types fixes

* refacs

* rem numpyarray

* threadsafety check

* private->public

* threadsafe checks

* pythonGC test

* threading fixes + tests

* threading tests+

* threading test fixes

* make PythonException unchecked

* nits

* docstrings

* path fixes
---
 pom.xml                                       |   1 +
 python4j/pom.xml                              |  66 ++
 python4j/python4j-core/pom.xml                |  44 ++
 .../java/org/eclipse/python4j/Python.java     | 611 ++++++++++++++++++
 .../python4j/PythonContextManager.java        | 241 +++++++
 .../org/eclipse/python4j/PythonException.java |  52 ++
 .../eclipse/python4j/PythonExecutioner.java   | 342 ++++++++++
 .../java/org/eclipse/python4j/PythonGC.java   | 137 ++++
 .../java/org/eclipse/python4j/PythonGIL.java  |  93 +++
 .../java/org/eclipse/python4j/PythonJob.java  | 175 +++++
 .../org/eclipse/python4j/PythonObject.java    | 244 +++++++
 .../java/org/eclipse/python4j/PythonType.java |  47 ++
 .../org/eclipse/python4j/PythonTypes.java     | 344 ++++++++++
 .../org/eclipse/python4j/PythonVariable.java  |  64 ++
 .../eclipse/python4j/pythonexec/pythonexec.py |  36 ++
 .../test/java/PythonBasicExecutionTest.java   | 108 ++++
 .../src/test/java/PythonCollectionsTest.java  |  62 ++
 .../test/java/PythonContextManagerTest.java   |  51 ++
 .../src/test/java/PythonGCTest.java           |  54 ++
 .../src/test/java/PythonJobTest.java          | 287 ++++++++
 .../src/test/java/PythonMultiThreadTest.java  | 169 +++++
 .../test/java/PythonPrimitiveTypesTest.java   |  82 +++
 python4j/python4j-numpy/pom.xml               |  42 ++
 23 files changed, 3352 insertions(+)
 create mode 100644 python4j/pom.xml
 create mode 100644 python4j/python4j-core/pom.xml
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java
 create mode 100644 python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java
 create mode 100644 python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py
 create mode 100644 python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonCollectionsTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonContextManagerTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonGCTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonJobTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonMultiThreadTest.java
 create mode 100644 python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java
 create mode 100644 python4j/python4j-numpy/pom.xml

diff --git a/pom.xml b/pom.xml
index ab9f80b92..184eeb11f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,6 +137,7 @@
         <module>jumpy</module>
         <module>pydatavec</module>
         <module>pydl4j</module>
+        <module>python4j</module>
     </modules>
 
     <scm>
diff --git a/python4j/pom.xml b/python4j/pom.xml
new file mode 100644
index 000000000..57af8f1bb
--- /dev/null
+++ b/python4j/pom.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (c) 2020 Konduit K.K.
+  ~
+  ~ This program and the accompanying materials are made available under the
+  ~ terms of the Apache License, Version 2.0 which is available at
+  ~ https://www.apache.org/licenses/LICENSE-2.0.
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations
+  ~ under the License.
+  ~
+  ~ SPDX-License-Identifier: Apache-2.0
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>deeplearning4j</artifactId>
+        <groupId>org.deeplearning4j</groupId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.eclipse</groupId>
+    <artifactId>python4j-parent</artifactId>
+    <packaging>pom</packaging>
+    <modules>
+        <module>python4j-core</module>
+        <module>python4j-numpy</module>
+    </modules>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <version>${lombok.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>${logback.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>${junit.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>${commons-io.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+            <version>3.0.2</version>
+        </dependency>
+    </dependencies>
+</project>
\ No newline at end of file
diff --git a/python4j/python4j-core/pom.xml b/python4j/python4j-core/pom.xml
new file mode 100644
index 000000000..b429d8272
--- /dev/null
+++ b/python4j/python4j-core/pom.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (c) 2020 Konduit K.K.
+  ~
+  ~ This program and the accompanying materials are made available under the
+  ~ terms of the Apache License, Version 2.0 which is available at
+  ~ https://www.apache.org/licenses/LICENSE-2.0.
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations
+  ~ under the License.
+  ~
+  ~ SPDX-License-Identifier: Apache-2.0
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>python4j-parent</artifactId>
+        <groupId>org.eclipse</groupId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+    <packaging>jar</packaging>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>python4j-core</artifactId>
+    <dependencies>
+        <dependency>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+            <version>20190722</version>
+        </dependency>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>cpython-platform</artifactId>
+            <version>${cpython-platform.version}</version>
+        </dependency>
+
+    </dependencies>
+</project>
\ No newline at end of file
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java
new file mode 100644
index 000000000..fd6fff112
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/Python.java
@@ -0,0 +1,611 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+package org.eclipse.python4j;
+
+import org.bytedeco.cpython.PyObject;
+
+import java.util.Collections;
+import java.util.List;
+
+import static org.bytedeco.cpython.global.python.*;
+
+
+public class Python {
+
+    static {
+        new PythonExecutioner();
+    }
+
+    /**
+     * Imports a python module, similar to python import statement.
+     *
+     * @param moduleName name of the module to be imported
+     * @return reference to the module object
+     */
+    public static PythonObject importModule(String moduleName) {
+        PythonGIL.assertThreadSafe();
+        PythonObject module = new PythonObject(PyImport_ImportModule(moduleName));
+        if (module.isNone()) {
+            throw new PythonException("Error importing module: " + moduleName);
+        }
+        return module;
+    }
+
+    /**
+     * Gets a builtins attribute
+     *
+     * @param attrName Attribute name
+     * @return
+     */
+    public static PythonObject attr(String attrName) {
+        PythonGIL.assertThreadSafe();
+        PyObject builtins = PyImport_ImportModule("builtins");
+        try {
+            return new PythonObject(PyObject_GetAttrString(builtins, attrName));
+        } finally {
+            Py_DecRef(builtins);
+        }
+    }
+
+
+    /**
+     * Gets the size of a PythonObject. similar to len() in python.
+     *
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject len(PythonObject pythonObject) {
+        PythonGIL.assertThreadSafe();
+        long n = PyObject_Size(pythonObject.getNativePythonObject());
+        if (n < 0) {
+            throw new PythonException("Object has no length: " + pythonObject);
+        }
+        return PythonTypes.INT.toPython(n);
+    }
+
+    /**
+     * Gets the string representation of an object.
+     *
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject str(PythonObject pythonObject) {
+        PythonGIL.assertThreadSafe();
+        try {
+            return PythonTypes.STR.toPython(pythonObject.toString());
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+
+
+    }
+
+    /**
+     * Returns an empty string
+     *
+     * @return
+     */
+    public static PythonObject str() {
+        PythonGIL.assertThreadSafe();
+        try {
+            return PythonTypes.STR.toPython("");
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Returns the str type object
+     * @return
+     */
+    public static PythonObject strType() {
+        return attr("str");
+    }
+
+    /**
+     * Returns a floating point number from a number or a string.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject float_(PythonObject pythonObject) {
+        return PythonTypes.FLOAT.toPython(PythonTypes.FLOAT.toJava(pythonObject));
+    }
+
+    /**
+     * Reutrns 0.
+     * @return
+     */
+    public static PythonObject float_() {
+        try {
+            return PythonTypes.FLOAT.toPython(0d);
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+    /**
+     * Returns the float type object
+     * @return
+     */
+    public static PythonObject floatType() {
+        return attr("float");
+    }
+
+
+    /**
+     * Converts a value to a Boolean value i.e., True or False, using the standard truth testing procedure.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject bool(PythonObject pythonObject) {
+        return PythonTypes.BOOL.toPython(PythonTypes.BOOL.toJava(pythonObject));
+
+    }
+
+    /**
+     * Returns False.
+     * @return
+     */
+    public static PythonObject bool() {
+        return PythonTypes.BOOL.toPython(false);
+
+    }
+
+    /**
+     * Returns the bool type object
+     * @return
+     */
+    public static PythonObject boolType() {
+        return attr("bool");
+    }
+
+    /**
+     * Returns an integer from a number or a string.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject int_(PythonObject pythonObject) {
+        return PythonTypes.INT.toPython(PythonTypes.INT.toJava(pythonObject));
+    }
+
+    /**
+     * Returns 0
+     * @return
+     */
+    public static PythonObject int_() {
+        return PythonTypes.INT.toPython(0L);
+
+    }
+
+    /**
+     * Returns the int type object
+     * @return
+     */
+    public static PythonObject intType() {
+        return attr("int");
+    }
+
+    /**
+     *  Takes sequence types and converts them to lists.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject list(PythonObject pythonObject) {
+        PythonGIL.assertThreadSafe();
+        try (PythonGC _ = PythonGC.watch()) {
+            PythonObject listF = attr("list");
+            PythonObject ret = listF.call(pythonObject);
+            if (ret.isNone()) {
+                throw new PythonException("Object is not iterable: " + pythonObject.toString());
+            }
+            return ret;
+        }
+    }
+
+    /**
+     * Returns empty list.
+     * @return
+     */
+    public static PythonObject list() {
+        return PythonTypes.LIST.toPython(Collections.emptyList());
+    }
+
+    /**
+     * Returns list type object.
+     * @return
+     */
+    public static PythonObject listType() {
+        return attr("list");
+    }
+
+    /**
+     *  Creates a dictionary.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject dict(PythonObject pythonObject) {
+        PythonObject dictF = attr("dict");
+        PythonObject ret = dictF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build dict from object: " + pythonObject.toString());
+        }
+        dictF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty dict
+     * @return
+     */
+    public static PythonObject dict() {
+        return PythonTypes.DICT.toPython(Collections.emptyMap());
+    }
+
+    /**
+     * Returns dict type object.
+     * @return
+     */
+    public static PythonObject dictType() {
+        return attr("dict");
+    }
+
+    /**
+     * Creates a set.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject set(PythonObject pythonObject) {
+        PythonObject setF = attr("set");
+        PythonObject ret = setF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build set from object: " + pythonObject.toString());
+        }
+        setF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty set.
+     * @return
+     */
+    public static PythonObject set() {
+        PythonObject setF = attr("set");
+        PythonObject ret;
+        ret = setF.call();
+        setF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty set.
+     * @return
+     */
+    public static PythonObject setType() {
+        return attr("set");
+    }
+
+    /**
+     * Creates a bytearray.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject bytearray(PythonObject pythonObject) {
+        PythonObject baF = attr("bytearray");
+        PythonObject ret = baF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build bytearray from object: " + pythonObject.toString());
+        }
+        baF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty bytearray.
+     * @return
+     */
+    public static PythonObject bytearray() {
+        PythonObject baF = attr("bytearray");
+        PythonObject ret;
+        ret = baF.call();
+        baF.del();
+        return ret;
+    }
+
+    /**
+     * Returns bytearray type object
+     * @return
+     */
+    public static PythonObject bytearrayType() {
+        return attr("bytearray");
+    }
+
+    /**
+     * Creates a memoryview.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject memoryview(PythonObject pythonObject) {
+        PythonObject mvF = attr("memoryview");
+        PythonObject ret = mvF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build memoryview from object: " + pythonObject.toString());
+        }
+        mvF.del();
+        return ret;
+    }
+
+    /**
+     * Returns memoryview type object.
+     * @return
+     */
+    public static PythonObject memoryviewType() {
+        return attr("memoryview");
+    }
+
+    /**
+     * Creates a byte string.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject bytes(PythonObject pythonObject) {
+        PythonObject bytesF = attr("bytes");
+        PythonObject ret = bytesF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build bytes from object: " + pythonObject.toString());
+        }
+        bytesF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty byte string.
+     * @return
+     */
+    public static PythonObject bytes() {
+        PythonObject bytesF = attr("bytes");
+        PythonObject ret;
+        ret = bytesF.call();
+        bytesF.del();
+        return ret;
+    }
+
+    /**
+     * Returns bytes type object
+     * @return
+     */
+    public static PythonObject bytesType() {
+        return attr("bytes");
+    }
+
+    /**
+     * Creates a tuple.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject tuple(PythonObject pythonObject) {
+        PythonObject tupleF = attr("tupleF");
+        PythonObject ret = tupleF.call(pythonObject);
+        if (ret.isNone()) {
+            throw new PythonException("Cannot build tuple from object: " + pythonObject.toString());
+        }
+        tupleF.del();
+        return ret;
+    }
+
+    /**
+     * Returns empty tuple.
+     * @return
+     */
+    public static PythonObject tuple() {
+        PythonObject tupleF = attr("tuple");
+        PythonObject ret;
+        ret = tupleF.call();
+        tupleF.del();
+        return ret;
+    }
+
+    /**
+     * Returns tuple type object
+     * @return
+     */
+    public static PythonObject tupleType() {
+        return attr("tuple");
+    }
+
+    /**
+     * Creates an Exception
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject Exception(PythonObject pythonObject) {
+        PythonObject excF = attr("Exception");
+        PythonObject ret = excF.call(pythonObject);
+        excF.del();
+        return ret;
+    }
+
+    /**
+     * Creates an Exception
+     * @return
+     */
+    public static PythonObject Exception() {
+        PythonObject excF = attr("Exception");
+        PythonObject ret;
+        ret = excF.call();
+        excF.del();
+        return ret;
+    }
+
+    /**
+     * Returns Exception type object
+     * @return
+     */
+    public static PythonObject ExceptionType() {
+        return attr("Exception");
+    }
+
+
+    /**
+     * Returns the globals dictionary.
+     * @return
+     */
+    public static PythonObject globals() {
+        PythonGIL.assertThreadSafe();
+        PyObject main = PyImport_ImportModule("__main__");
+        PyObject globals = PyModule_GetDict(main);
+        Py_DecRef(main);
+        return new PythonObject(globals, false);
+    }
+
+    /**
+     * Returns the type of an object.
+     * @param pythonObject
+     * @return
+     */
+    public static PythonObject type(PythonObject pythonObject) {
+        PythonObject typeF = attr("type");
+        PythonObject ret = typeF.call(pythonObject);
+        typeF.del();
+        return ret;
+    }
+
+    /**
+     * Returns True if the specified object is of the specified type, otherwise False.
+     * @param obj
+     * @param type
+     * @return
+     */
+    public static boolean isinstance(PythonObject obj, PythonObject... type) {
+        PythonGIL.assertThreadSafe();
+        PyObject argsTuple = PyTuple_New(type.length);
+        try {
+            for (int i = 0; i < type.length; i++) {
+                PythonObject x = type[i];
+                Py_IncRef(x.getNativePythonObject());
+                PyTuple_SetItem(argsTuple, i, x.getNativePythonObject());
+            }
+            return PyObject_IsInstance(obj.getNativePythonObject(), argsTuple) != 0;
+        } finally {
+            Py_DecRef(argsTuple);
+        }
+
+    }
+
+    /**
+     * Evaluates the specified expression.
+     * @param expression
+     * @return
+     */
+    public static PythonObject eval(String expression) {
+
+        PythonGIL.assertThreadSafe();
+        PyObject compiledCode = Py_CompileString(expression, "", Py_eval_input);
+        PyObject main = PyImport_ImportModule("__main__");
+        PyObject globals = PyModule_GetDict(main);
+        PyObject locals = PyDict_New();
+        try {
+            return new PythonObject(PyEval_EvalCode(compiledCode, globals, locals));
+        } finally {
+            Py_DecRef(main);
+            Py_DecRef(locals);
+            Py_DecRef(compiledCode);
+        }
+
+    }
+
+    /**
+     * Returns the builtins module
+     * @return
+     */
+    public static PythonObject builtins() {
+        return importModule("builtins");
+
+    }
+
+    /**
+     * Returns None.
+     * @return
+     */
+    public static PythonObject None() {
+        return eval("None");
+    }
+
+    /**
+     * Returns True.
+     * @return
+     */
+    public static PythonObject True() {
+        return eval("True");
+    }
+
+    /**
+     * Returns False.
+     * @return
+     */
+    public static PythonObject False() {
+        return eval("False");
+    }
+
+    /**
+     * Returns True if the object passed is callable callable, otherwise False.
+     * @param pythonObject
+     * @return
+     */
+    public static boolean callable(PythonObject pythonObject) {
+        PythonGIL.assertThreadSafe();
+        return PyCallable_Check(pythonObject.getNativePythonObject()) == 1;
+    }
+
+
+    public static void setContext(String context){
+        PythonContextManager.setContext(context);
+    }
+
+    public static String getCurrentContext() {
+        return PythonContextManager.getCurrentContext();
+    }
+
+    public static void deleteContext(String context){
+        PythonContextManager.deleteContext(context);
+    }
+    public static void resetContext() {
+        PythonContextManager.reset();
+    }
+
+    /**
+     * Executes a string of code.
+     * @param code
+     * @throws PythonException
+     */
+    public static void exec(String code) throws PythonException {
+        PythonExecutioner.exec(code);
+    }
+
+    /**
+     * Executes a string of code.
+     * @param code
+     * @param inputs
+     * @param outputs
+     */
+    public static void exec(String code, List<PythonVariable> inputs, List<PythonVariable> outputs){
+        PythonExecutioner.exec(code, inputs, outputs);
+    }
+
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java
new file mode 100644
index 000000000..a34d8a239
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonContextManager.java
@@ -0,0 +1,241 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+import javax.lang.model.SourceVersion;
+
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Emulates multiples interpreters in a single interpreter.
+ * This works by simply obfuscating/de-obfuscating variable names
+ * such that only the required subset of the global namespace is "visible"
+ * at any given time.
+ * By default, there exists a "main" context emulating the default interpreter
+ *
+ * @author Fariz Rahman
+ */
+
+
+public class PythonContextManager {
+
+    private static Set<String> contexts = new HashSet<>();
+    private static AtomicBoolean init = new AtomicBoolean(false);
+    private static String currentContext;
+    private static final String MAIN_CONTEXT = "main";
+    private static final String COLLAPSED_KEY = "__collapsed__";
+
+    static {
+        init();
+    }
+
+    private static void init() {
+        if (init.get()) return;
+        new PythonExecutioner();
+        init.set(true);
+        currentContext = MAIN_CONTEXT;
+        contexts.add(currentContext);
+    }
+
+
+    /**
+     * Adds a new context.
+     * @param contextName
+     */
+    public static void addContext(String contextName) {
+        if (!validateContextName(contextName)) {
+            throw new PythonException("Invalid context name: " + contextName);
+        }
+        contexts.add(contextName);
+    }
+
+    /**
+     * Returns true if context exists, else false.
+     * @param contextName
+     * @return
+     */
+    public static boolean hasContext(String contextName) {
+        return contexts.contains(contextName);
+    }
+
+    private static boolean validateContextName(String s) {
+        return SourceVersion.isIdentifier(s) && !s.startsWith(COLLAPSED_KEY);
+    }
+
+    private static String getContextPrefix(String contextName) {
+        return COLLAPSED_KEY + contextName + "__";
+    }
+
+    private static String getCollapsedVarNameForContext(String varName, String contextName) {
+        return getContextPrefix(contextName) + varName;
+    }
+
+    private static String expandCollapsedVarName(String varName, String contextName) {
+        String prefix = COLLAPSED_KEY + contextName + "__";
+        return varName.substring(prefix.length());
+
+    }
+
+    private static void collapseContext(String contextName) {
+        try (PythonGC _ = PythonGC.watch()) {
+            PythonObject globals = Python.globals();
+            PythonObject pop = globals.attr("pop");
+            PythonObject keysF = globals.attr("keys");
+            PythonObject keys = keysF.call();
+            PythonObject keysList = Python.list(keys);
+            int numKeys = Python.len(keysList).toInt();
+            for (int i = 0; i < numKeys; i++) {
+                PythonObject key = keysList.get(i);
+                String keyStr = key.toString();
+                if (!((keyStr.startsWith("__") && keyStr.endsWith("__")) || keyStr.startsWith("__collapsed_"))) {
+                    String collapsedKey = getCollapsedVarNameForContext(keyStr, contextName);
+                    PythonObject val = pop.call(key);
+
+                    PythonObject pyNewKey = new PythonObject(collapsedKey);
+                    globals.set(pyNewKey, val);
+                }
+            }
+        } catch (Exception pe) {
+            throw new RuntimeException(pe);
+        }
+    }
+
+    private static void expandContext(String contextName) {
+        try (PythonGC _ = PythonGC.watch()) {
+            String prefix = getContextPrefix(contextName);
+            PythonObject globals = Python.globals();
+            PythonObject pop = globals.attr("pop");
+            PythonObject keysF = globals.attr("keys");
+
+            PythonObject keys = keysF.call();
+
+            PythonObject keysList = Python.list(keys);
+            try (PythonGC __ = PythonGC.pause()) {
+                int numKeys = Python.len(keysList).toInt();
+
+                for (int i = 0; i < numKeys; i++) {
+                    PythonObject key = keysList.get(i);
+                    String keyStr = key.toString();
+                    if (keyStr.startsWith(prefix)) {
+                        String expandedKey = expandCollapsedVarName(keyStr, contextName);
+                        PythonObject val = pop.call(key);
+                        PythonObject newKey = new PythonObject(expandedKey);
+                        globals.set(newKey, val);
+                    }
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Activates the specified context
+     * @param contextName
+     */
+    public static void setContext(String contextName) {
+        if (contextName.equals(currentContext)) {
+            return;
+        }
+        if (!hasContext(contextName)) {
+            addContext(contextName);
+        }
+
+
+        collapseContext(currentContext);
+
+        expandContext(contextName);
+        currentContext = contextName;
+
+    }
+
+    /**
+     * Activates the main context
+     */
+    public static void setMainContext() {
+        setContext(MAIN_CONTEXT);
+
+    }
+
+    /**
+     * Returns the current context's name.
+     * @return
+     */
+    public static String getCurrentContext() {
+        return currentContext;
+    }
+
+    /**
+     * Resets the current context.
+     */
+    public static void reset() {
+        String tempContext = "___temp__context___";
+        String currContext = currentContext;
+        setContext(tempContext);
+        deleteContext(currContext);
+        setContext(currContext);
+    }
+
+    /**
+     * Deletes the specified context.
+     * @param contextName
+     */
+    public static void deleteContext(String contextName) {
+        if (contextName.equals(currentContext)) {
+            throw new PythonException("Cannot delete current context!");
+        }
+        if (!contexts.contains(contextName)) {
+            return;
+        }
+        String prefix = getContextPrefix(contextName);
+        PythonObject globals = Python.globals();
+        PythonObject keysList = Python.list(globals.attr("keys").call());
+        int numKeys = Python.len(keysList).toInt();
+        for (int i = 0; i < numKeys; i++) {
+            PythonObject key = keysList.get(i);
+            String keyStr = key.toString();
+            if (keyStr.startsWith(prefix)) {
+                globals.attr("__delitem__").call(key);
+            }
+        }
+        contexts.remove(contextName);
+    }
+
+    /**
+     * Deletes all contexts except the main context.
+     */
+    public static void deleteNonMainContexts() {
+        setContext(MAIN_CONTEXT); // will never fail
+        for (String c : contexts.toArray(new String[0])) {
+            if (!c.equals(MAIN_CONTEXT)) {
+                deleteContext(c); // will never fail
+            }
+        }
+
+    }
+
+    /**
+     * Returns the names of all contexts.
+     * @return
+     */
+    public String[] getContexts() {
+        return contexts.toArray(new String[0]);
+    }
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java
new file mode 100644
index 000000000..a9bbf596c
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonException.java
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+/**
+ * Thrown when an exception occurs in python land
+ */
+public class PythonException extends RuntimeException {
+    public PythonException(String message) {
+        super(message);
+    }
+
+    private static String getExceptionString(PythonObject exception) {
+        try (PythonGC gc = PythonGC.watch()) {
+            if (Python.isinstance(exception, Python.ExceptionType())) {
+                String exceptionClass = Python.type(exception).attr("__name__").toString();
+                String message = exception.toString();
+                return exceptionClass + ": " + message;
+            }
+            return exception.toString();
+        } catch (Exception e) {
+            throw new RuntimeException("An error occurred while trying to create a PythonException.", e);
+        }
+    }
+
+    public PythonException(PythonObject exception) {
+        this(getExceptionString(exception));
+    }
+
+    public PythonException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public PythonException(Throwable cause) {
+        super(cause);
+    }
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java
new file mode 100644
index 000000000..57e1a22ae
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonExecutioner.java
@@ -0,0 +1,342 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+package org.eclipse.python4j;
+
+import org.bytedeco.cpython.PyObject;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.commons.io.IOUtils;
+import org.bytedeco.cpython.global.python;
+
+import static org.bytedeco.cpython.global.python.*;
+import static org.bytedeco.cpython.global.python.PyImport_ImportModule;
+import static org.bytedeco.cpython.helper.python.Py_SetPath;
+
+
+public class PythonExecutioner {
+    private final static String PYTHON_EXCEPTION_KEY = "__python_exception__";
+    private static AtomicBoolean init = new AtomicBoolean(false);
+    private final static String DEFAULT_PYTHON_PATH_PROPERTY = "org.eclipse.python4j.path";
+    private final static String JAVACPP_PYTHON_APPEND_TYPE = "org.eclipse.python4j.path.append";
+    private final static String DEFAULT_APPEND_TYPE = "before";
+
+    static {
+        init();
+    }
+
+    private static synchronized void init() {
+        if (init.get()) {
+            return;
+        }
+        init.set(true);
+        initPythonPath();
+        PyEval_InitThreads();
+        Py_InitializeEx(0);
+    }
+
+    /**
+     * Sets a variable.
+     *
+     * @param name
+     * @param value
+     */
+    public static void setVariable(String name, PythonObject value) {
+        PythonGIL.assertThreadSafe();
+        PyObject main = PyImport_ImportModule("__main__");
+        PyObject globals = PyModule_GetDict(main);
+        PyDict_SetItemString(globals, name, value.getNativePythonObject());
+        Py_DecRef(main);
+
+    }
+
+    /**
+     * Sets given list of PythonVariables in the interpreter.
+     *
+     * @param pyVars
+     */
+    public static void setVariables(List<PythonVariable> pyVars) {
+        for (PythonVariable pyVar : pyVars)
+            setVariable(pyVar.getName(), pyVar.getPythonObject());
+    }
+
+    /**
+     * Sets given list of PythonVariables in the interpreter.
+     *
+     * @param pyVars
+     */
+    public static void setVariables(PythonVariable... pyVars) {
+        setVariables(Arrays.asList(pyVars));
+    }
+
+    /**
+     * Gets the given list of PythonVariables from the interpreter.
+     *
+     * @param pyVars
+     */
+    public static void getVariables(List<PythonVariable> pyVars) {
+        for (PythonVariable pyVar : pyVars)
+            pyVar.setValue(getVariable(pyVar.getName(), pyVar.getType()).getValue());
+    }
+
+    /**
+     * Gets the given list of PythonVariables from the interpreter.
+     *
+     * @param pyVars
+     */
+    public static void getVariables(PythonVariable... pyVars) {
+        getVariables(Arrays.asList(pyVars));
+    }
+
+    /**
+     * Gets the variable with the given name from the interpreter.
+     *
+     * @param name
+     * @return
+     */
+    public static PythonObject getVariable(String name) {
+        PythonGIL.assertThreadSafe();
+        PyObject main = PyImport_ImportModule("__main__");
+        PyObject globals = PyModule_GetDict(main);
+        PyObject pyName = PyUnicode_FromString(name);
+        try {
+            if (PyDict_Contains(globals, pyName) == 1) {
+                return new PythonObject(PyObject_GetItem(globals, pyName), false);
+            }
+        } finally {
+            Py_DecRef(main);
+            //Py_DecRef(globals);
+            Py_DecRef(pyName);
+        }
+        return new PythonObject(null);
+    }
+
+    /**
+     * Gets the variable with the given name from the interpreter.
+     *
+     * @param name
+     * @return
+     */
+    public static <T> PythonVariable<T> getVariable(String name, PythonType<T> type) {
+        PythonObject val = getVariable(name);
+        return new PythonVariable<>(name, type, type.toJava(val));
+    }
+
+    /**
+     * Executes a string of code
+     *
+     * @param code
+     */
+    public static synchronized void simpleExec(String code) {
+        PythonGIL.assertThreadSafe();
+        int result = PyRun_SimpleStringFlags(code, null);
+        if (result != 0) {
+            throw new PythonException("Execution failed, unable to retrieve python exception.");
+        }
+    }
+
+    private static void throwIfExecutionFailed() {
+        PythonObject ex = getVariable(PYTHON_EXCEPTION_KEY);
+        if (ex != null && !ex.isNone() && !ex.toString().isEmpty()) {
+            setVariable(PYTHON_EXCEPTION_KEY, PythonTypes.STR.toPython(""));
+            throw new PythonException(ex);
+        }
+    }
+
+
+    private static String getWrappedCode(String code) {
+
+        try (InputStream is = PythonExecutioner.class
+                .getResourceAsStream("pythonexec/pythonexec.py")) {
+            String base = IOUtils.toString(is, StandardCharsets.UTF_8);
+            String indentedCode = "    " + code.replace("\n", "\n    ");
+            String out = base.replace("    pass", indentedCode);
+            return out;
+        } catch (IOException e) {
+            throw new IllegalStateException("Unable to read python code!", e);
+        }
+
+    }
+
+    /**
+     * Executes a string of code. Throws PythonException if execution fails.
+     *
+     * @param code
+     */
+    public static void exec(String code) {
+        simpleExec(getWrappedCode(code));
+        throwIfExecutionFailed();
+    }
+
+    public static void exec(String code, List<PythonVariable> inputs, List<PythonVariable> outputs) {
+        if (inputs != null) {
+            setVariables(inputs.toArray(new PythonVariable[0]));
+        }
+        exec(code);
+        if (outputs != null) {
+            getVariables(outputs.toArray(new PythonVariable[0]));
+        }
+    }
+
+    /**
+     * Return list of all supported variables in the interpreter.
+     *
+     * @return
+     */
+    public static List<PythonVariable> getAllVariables() {
+        PythonGIL.assertThreadSafe();
+        List<PythonVariable> ret = new ArrayList<>();
+        PyObject main = PyImport_ImportModule("__main__");
+        PyObject globals = PyModule_GetDict(main);
+        PyObject keys = PyDict_Keys(globals);
+        PyObject keysIter = PyObject_GetIter(keys);
+        try {
+
+            long n = PyObject_Size(globals);
+            for (int i = 0; i < n; i++) {
+                PyObject pyKey = PyIter_Next(keysIter);
+                try {
+                    if (!new PythonObject(pyKey, false).toString().startsWith("_")) {
+
+                        PyObject pyVal = PyObject_GetItem(globals, pyKey); // TODO check ref count
+                        PythonType pt;
+                        try {
+                            pt = PythonTypes.getPythonTypeForPythonObject(new PythonObject(pyVal, false));
+
+                        } catch (PythonException pe) {
+                            pt = null;
+                        }
+                        if (pt != null) {
+                            ret.add(
+                                    new PythonVariable<>(
+                                            new PythonObject(pyKey, false).toString(),
+                                            pt,
+                                            pt.toJava(new PythonObject(pyVal, false))
+                                    )
+                            );
+                        }
+                    }
+                } finally {
+                    Py_DecRef(pyKey);
+                }
+            }
+        } finally {
+            Py_DecRef(keysIter);
+            Py_DecRef(keys);
+            Py_DecRef(main);
+            return ret;
+        }
+
+    }
+
+
+    /**
+     * Executes a string of code and returns a list of all supported variables.
+     *
+     * @param code
+     * @param inputs
+     * @return
+     */
+    public static List<PythonVariable> execAndReturnAllVariables(String code, List<PythonVariable> inputs) {
+        setVariables(inputs);
+        simpleExec(getWrappedCode(code));
+        return getAllVariables();
+    }
+
+    /**
+     * Executes a string of code and returns a list of all supported variables.
+     *
+     * @param code
+     * @return
+     */
+    public static List<PythonVariable> execAndReturnAllVariables(String code) {
+        simpleExec(getWrappedCode(code));
+        return getAllVariables();
+    }
+
+    private static synchronized void initPythonPath() {
+        try {
+            String path = System.getProperty(DEFAULT_PYTHON_PATH_PROPERTY);
+            if (path == null) {
+                File[] packages = cachePackages();
+
+                //// TODO: fix in javacpp
+                File sitePackagesWindows = new File(python.cachePackage(), "site-packages");
+                File[] packages2 = new File[packages.length + 1];
+                for (int i = 0; i < packages.length; i++) {
+                    //System.out.println(packages[i].getAbsolutePath());
+                    packages2[i] = packages[i];
+                }
+                packages2[packages.length] = sitePackagesWindows;
+                //System.out.println(sitePackagesWindows.getAbsolutePath());
+                packages = packages2;
+                //////////
+
+                Py_SetPath(packages);
+            } else {
+                StringBuffer sb = new StringBuffer();
+                File[] packages = cachePackages();
+                JavaCppPathType pathAppendValue = JavaCppPathType.valueOf(System.getProperty(JAVACPP_PYTHON_APPEND_TYPE, DEFAULT_APPEND_TYPE).toUpperCase());
+                switch (pathAppendValue) {
+                    case BEFORE:
+                        for (File cacheDir : packages) {
+                            sb.append(cacheDir);
+                            sb.append(java.io.File.pathSeparator);
+                        }
+
+                        sb.append(path);
+                        break;
+                    case AFTER:
+                        sb.append(path);
+
+                        for (File cacheDir : packages) {
+                            sb.append(cacheDir);
+                            sb.append(java.io.File.pathSeparator);
+                        }
+                        break;
+                    case NONE:
+                        sb.append(path);
+                        break;
+                }
+
+                Py_SetPath(sb.toString());
+            }
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private enum JavaCppPathType {
+        BEFORE, AFTER, NONE
+    }
+
+    private static File[] cachePackages() throws IOException {
+        File[] path = org.bytedeco.cpython.global.python.cachePackages();
+        path = Arrays.copyOf(path, path.length + 1);
+        path[path.length - 1] = cachePackage();
+        return path;
+    }
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java
new file mode 100644
index 000000000..5531b67d3
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGC.java
@@ -0,0 +1,137 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+package org.eclipse.python4j;
+
+import org.bytedeco.cpython.PyObject;
+import org.bytedeco.javacpp.Pointer;
+
+import java.io.Closeable;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.bytedeco.cpython.global.python.*;
+
+/**
+ * Wrap your code in a try-with-PythonGC block for automatic GC:
+ * ```
+ * try(PythonGC gc = PythonGC.lock()){
+ *     // your code here
+ * }
+ *
+ * If a PythonObject created inside such a block has to be used outside
+ * the block, use PythonGC.keep() to exclude that object from GC.
+ *
+ * ```
+ * PythonObject pyObj;
+ *
+ * try(PythonGC gc = PythonG.lock()){
+ *     // do stuff
+ *     pyObj = someFunction();
+ *     PythonGC.keep(pyObj);
+ * }
+ *
+ */
+public class PythonGC implements Closeable {
+
+    private PythonGC previousFrame = null;
+    private boolean active = true;
+    private static PythonGC currentFrame = new PythonGC();
+
+    private Set<PyObject> objects = new HashSet<>();
+
+    private boolean alreadyRegistered(PyObject pyObject) {
+        if (objects.contains(pyObject)) {
+            return true;
+        }
+        if (previousFrame == null) {
+            return false;
+        }
+        return previousFrame.alreadyRegistered(pyObject);
+
+    }
+
+    private void addObject(PythonObject pythonObject) {
+        if (!active) return;
+        if (Pointer.isNull(pythonObject.getNativePythonObject()))return;
+        if (alreadyRegistered(pythonObject.getNativePythonObject())) {
+            return;
+        }
+        objects.add(pythonObject.getNativePythonObject());
+    }
+
+    public static void register(PythonObject pythonObject) {
+        currentFrame.addObject(pythonObject);
+    }
+
+    public static void keep(PythonObject pythonObject) {
+        currentFrame.objects.remove(pythonObject.getNativePythonObject());
+        if (currentFrame.previousFrame != null) {
+            currentFrame.previousFrame.addObject(pythonObject);
+        }
+    }
+
+    private PythonGC() {
+    }
+
+    public static PythonGC watch() {
+        PythonGC ret = new PythonGC();
+        ret.previousFrame = currentFrame;
+        ret.active = currentFrame.active;
+        currentFrame = ret;
+        return ret;
+    }
+
+    private void collect() {
+        for (PyObject pyObject : objects) {
+            // TODO find out how globals gets collected here
+            if (pyObject.equals(Python.globals().getNativePythonObject())) continue;
+//            try{
+//                System.out.println(PythonTypes.STR.toJava(new PythonObject(pyObject, false)));
+//            }catch (Exception e){}
+            Py_DecRef(pyObject);
+
+        }
+        this.objects = new HashSet<>();
+    }
+
+    @Override
+    public void close() {
+        if (active) collect();
+        currentFrame = previousFrame;
+    }
+
+    public static boolean isWatching() {
+        if (!currentFrame.active) return false;
+        return currentFrame.previousFrame != null;
+    }
+
+    public static PythonGC pause() {
+        PythonGC pausedFrame = new PythonGC();
+        pausedFrame.active = false;
+        pausedFrame.previousFrame = currentFrame;
+        currentFrame = pausedFrame;
+        return pausedFrame;
+    }
+
+    public static void resume() {
+        if (currentFrame.active) {
+            throw new RuntimeException("GC not paused!");
+        }
+        currentFrame = currentFrame.previousFrame;
+    }
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java
new file mode 100644
index 000000000..46b3db431
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonGIL.java
@@ -0,0 +1,93 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+import org.bytedeco.cpython.PyThreadState;
+import org.omg.SendingContext.RunTime;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.bytedeco.cpython.global.python.*;
+
+
+public class PythonGIL implements AutoCloseable {
+    private static PyThreadState mainThreadState;
+    private static final AtomicBoolean acquired = new AtomicBoolean();
+    private boolean acquiredByMe = false;
+    private static long defaultThreadId = -1;
+
+    public static void assertThreadSafe() {
+        if (acquired.get()) {
+            return;
+        }
+        if (defaultThreadId == -1) {
+            defaultThreadId = Thread.currentThread().getId();
+        } else if (defaultThreadId != Thread.currentThread().getId()) {
+            throw new RuntimeException("Attempt to use Python4j from multiple threads without " +
+                    "acquiring GIL. Enclose your code in a try(PythonGIL gil = PythonGIL.lock()){...}" +
+                    " block to ensure that GIL is acquired in multi-threaded environments.");
+        }
+
+
+    }
+
+    static {
+        new PythonExecutioner();
+    }
+
+    private PythonGIL() {
+        while (acquired.get()) {
+            try {
+                Thread.sleep(10);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+
+        }
+        acquire();
+        acquired.set(true);
+        acquiredByMe = true;
+
+    }
+
+    @Override
+    public void close() {
+        if (acquiredByMe) {
+            release();
+            acquired.set(false);
+            acquiredByMe = false;
+        }
+
+    }
+
+    public static synchronized PythonGIL lock() {
+        return new PythonGIL();
+    }
+
+    private static synchronized void acquire() {
+        mainThreadState = PyEval_SaveThread();
+        PyThreadState ts = PyThreadState_New(mainThreadState.interp());
+        PyEval_RestoreThread(ts);
+        PyThreadState_Swap(ts);
+    }
+
+    private static void release() { // do not synchronize!
+        PyEval_SaveThread();
+        PyEval_RestoreThread(mainThreadState);
+    }
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java
new file mode 100644
index 000000000..cdbb1b81d
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonJob.java
@@ -0,0 +1,175 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.annotation.Nonnull;
+import java.util.List;
+
+
+@Data
+@NoArgsConstructor
+/**
+ * PythonJob is the right abstraction for executing multiple python scripts
+ * in a multi thread stateful environment. The setup-and-run mode allows your
+ * "setup" code (imports, model loading etc) to be executed only once.
+ */
+public class PythonJob {
+
+    private String code;
+    private String name;
+    private String context;
+    private boolean setupRunMode;
+    private PythonObject runF;
+
+    static {
+        new PythonExecutioner();
+    }
+
+    @Builder
+    /**
+     * @param name Name for the python job.
+     * @param code Python code.
+     * @param setupRunMode If true, the python code is expected to have two methods: setup(), which takes no arguments,
+     *                     and run() which takes some or no arguments. setup() method is executed once,
+     *                     and the run() method is called with the inputs(if any) per transaction, and is expected to return a dictionary
+     *                     mapping from output variable names (str) to output values.
+     *                     If false, the full script is run on each transaction and the output variables are obtained from the global namespace
+     *                     after execution.
+     */
+    public PythonJob(@Nonnull String name, @Nonnull String code, boolean setupRunMode){
+        this.name = name;
+        this.code = code;
+        this.setupRunMode = setupRunMode;
+        context = "__job_" + name;
+        if (PythonContextManager.hasContext(context)) {
+            throw new PythonException("Unable to create python job " + name + ". Context " + context + " already exists!");
+        }
+        if (setupRunMode) setup();
+    }
+
+
+    /**
+     * Clears all variables in current context and calls setup()
+     */
+    public void clearState(){
+        String context = this.context;
+        PythonContextManager.setContext("main");
+        PythonContextManager.deleteContext(context);
+        this.context = context;
+        setup();
+    }
+
+    public void setup(){
+        try (PythonGIL gil = PythonGIL.lock()) {
+            PythonContextManager.setContext(context);
+            PythonObject runF = PythonExecutioner.getVariable("run");
+            if (runF == null || runF.isNone() || !Python.callable(runF)) {
+                PythonExecutioner.exec(code);
+                runF = PythonExecutioner.getVariable("run");
+            }
+            if (runF.isNone() || !Python.callable(runF)) {
+                throw new PythonException("run() method not found! " +
+                        "If a PythonJob is created with 'setup and run' " +
+                        "mode enabled, the associated python code is " +
+                        "expected to contain a run() method " +
+                        "(with or without arguments).");
+            }
+            this.runF = runF;
+            PythonObject setupF = PythonExecutioner.getVariable("setup");
+            if (!setupF.isNone()) {
+                setupF.call();
+            }
+        }
+    }
+
+    public void exec(List<PythonVariable> inputs, List<PythonVariable> outputs) {
+        try (PythonGIL gil = PythonGIL.lock()) {
+            try (PythonGC _ = PythonGC.watch()) {
+                PythonContextManager.setContext(context);
+
+                if (!setupRunMode) {
+
+                    PythonExecutioner.exec(code, inputs, outputs);
+
+                    return;
+                }
+                PythonExecutioner.setVariables(inputs);
+
+                PythonObject inspect = Python.importModule("inspect");
+                PythonObject getfullargspec = inspect.attr("getfullargspec");
+                PythonObject argspec = getfullargspec.call(runF);
+                PythonObject argsList = argspec.attr("args");
+                PythonObject runargs = Python.dict();
+                int argsCount = Python.len(argsList).toInt();
+                for (int i = 0; i < argsCount; i++) {
+                    PythonObject arg = argsList.get(i);
+                    PythonObject val = Python.globals().get(arg);
+                    if (val.isNone()) {
+                        throw new PythonException("Input value not received for run() argument: " + arg.toString());
+                    }
+                    runargs.set(arg, val);
+                }
+                PythonObject outDict = runF.callWithKwargs(runargs);
+                PythonObject globals = Python.globals();
+                PythonObject updateF = globals.attr("update");
+                updateF.call(outDict);
+                PythonExecutioner.getVariables(outputs);
+            }
+        }
+
+    }
+
+    public List<PythonVariable> execAndReturnAllVariables(List<PythonVariable> inputs){
+        try (PythonGIL gil = PythonGIL.lock()) {
+            try (PythonGC _ = PythonGC.watch()) {
+                PythonContextManager.setContext(context);
+                if (!setupRunMode) {
+                    return PythonExecutioner.execAndReturnAllVariables(code, inputs);
+                }
+                PythonExecutioner.setVariables(inputs);
+                PythonObject inspect = Python.importModule("inspect");
+                PythonObject getfullargspec = inspect.attr("getfullargspec");
+                PythonObject argspec = getfullargspec.call(runF);
+                PythonObject argsList = argspec.attr("args");
+                PythonObject runargs = Python.dict();
+                int argsCount = Python.len(argsList).toInt();
+                for (int i = 0; i < argsCount; i++) {
+                    PythonObject arg = argsList.get(i);
+                    PythonObject val = Python.globals().get(arg);
+                    if (val.isNone()) {
+                        throw new PythonException("Input value not received for run() argument: " + arg.toString());
+                    }
+                    runargs.set(arg, val);
+                }
+
+                PythonObject outDict = runF.callWithKwargs(runargs);
+                PythonObject globals = Python.globals();
+                PythonObject updateF = globals.attr("update");
+                updateF.call(outDict);
+                return PythonExecutioner.getAllVariables();
+            }
+
+        }
+    }
+
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java
new file mode 100644
index 000000000..f8ec17ed9
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonObject.java
@@ -0,0 +1,244 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+import org.bytedeco.cpython.PyObject;
+import org.bytedeco.javacpp.Pointer;
+
+import java.util.*;
+
+import static org.bytedeco.cpython.global.python.*;
+
+public class PythonObject {
+
+    static {
+        new PythonExecutioner();
+    }
+
+    private boolean owned = true;
+    private PyObject nativePythonObject;
+
+
+    public PythonObject(PyObject nativePythonObject, boolean owned) {
+        PythonGIL.assertThreadSafe();
+        this.nativePythonObject = nativePythonObject;
+        this.owned = owned;
+        if (owned && nativePythonObject != null) {
+            PythonGC.register(this);
+        }
+    }
+
+    public PythonObject(PyObject nativePythonObject) {
+        PythonGIL.assertThreadSafe();
+        this.nativePythonObject = nativePythonObject;
+        if (nativePythonObject != null) {
+            PythonGC.register(this);
+        }
+
+    }
+
+    public PyObject getNativePythonObject() {
+        return nativePythonObject;
+    }
+
+    public String toString() {
+        return PythonTypes.STR.toJava(this);
+
+    }
+
+    public boolean isNone() {
+        if (nativePythonObject == null || Pointer.isNull(nativePythonObject)) {
+            return true;
+        }
+        try (PythonGC _ = PythonGC.pause()) {
+            PythonObject type = Python.type(this);
+            boolean ret = Python.type(this).toString().equals("<class 'NoneType'>") && toString().equals("None");
+            Py_DecRef(type.nativePythonObject);
+            return ret;
+        }
+    }
+
+    public void del() {
+        PythonGIL.assertThreadSafe();
+        if (owned && nativePythonObject != null && !PythonGC.isWatching()) {
+            Py_DecRef(nativePythonObject);
+            nativePythonObject = null;
+        }
+    }
+
+    public PythonObject callWithArgs(PythonObject args) {
+        return callWithArgsAndKwargs(args, null);
+    }
+
+    public PythonObject callWithKwargs(PythonObject kwargs) {
+        if (!Python.callable(this)) {
+            throw new PythonException("Object is not callable: " + toString());
+        }
+        PyObject tuple = PyTuple_New(0);
+        PyObject dict = kwargs.nativePythonObject;
+        if (PyObject_IsInstance(dict, new PyObject(PyDict_Type())) != 1) {
+            throw new PythonException("Expected kwargs to be dict. Received: " + kwargs.toString());
+        }
+        PythonObject ret = new PythonObject(PyObject_Call(nativePythonObject, tuple, dict));
+        Py_DecRef(tuple);
+        return ret;
+    }
+
+    public PythonObject callWithArgsAndKwargs(PythonObject args, PythonObject kwargs) {
+        PythonGIL.assertThreadSafe();
+        PyObject tuple = null;
+        boolean ownsTuple = false;
+        try {
+            if (!Python.callable(this)) {
+                throw new PythonException("Object is not callable: " + toString());
+            }
+
+            if (PyObject_IsInstance(args.nativePythonObject, new PyObject(PyTuple_Type())) == 1) {
+                tuple = args.nativePythonObject;
+            } else if (PyObject_IsInstance(args.nativePythonObject, new PyObject(PyList_Type())) == 1) {
+                tuple = PyList_AsTuple(args.nativePythonObject);
+                ownsTuple = true;
+            } else {
+                throw new PythonException("Expected args to be tuple or list. Received: " + args.toString());
+            }
+            if (kwargs != null && PyObject_IsInstance(kwargs.nativePythonObject, new PyObject(PyDict_Type())) != 1) {
+                throw new PythonException("Expected kwargs to be dict. Received: " + kwargs.toString());
+            }
+            return new PythonObject(PyObject_Call(nativePythonObject, tuple, kwargs == null ? null : kwargs.nativePythonObject));
+        } finally {
+            if (ownsTuple) Py_DecRef(tuple);
+        }
+
+    }
+
+
+    public PythonObject call(Object... args) {
+        return callWithArgsAndKwargs(Arrays.asList(args), null);
+    }
+
+    public PythonObject callWithArgs(List args) {
+        return call(args, null);
+    }
+
+    public PythonObject callWithKwargs(Map kwargs) {
+        return call(null, kwargs);
+    }
+
+    public PythonObject callWithArgsAndKwargs(List args, Map kwargs) {
+        PythonGIL.assertThreadSafe();
+        try (PythonGC _ = PythonGC.watch()) {
+            if (!Python.callable(this)) {
+                throw new PythonException("Object is not callable: " + toString());
+            }
+            PythonObject pyArgs;
+            PythonObject pyKwargs;
+            if (args == null) {
+                pyArgs = new PythonObject(PyTuple_New(0));
+            } else {
+                PythonObject argsList = PythonTypes.convert(args);
+                pyArgs = new PythonObject(PyList_AsTuple(argsList.getNativePythonObject()));
+            }
+            if (kwargs == null) {
+                pyKwargs = null;
+            } else {
+                pyKwargs = PythonTypes.convert(kwargs);
+            }
+            PythonObject ret = new PythonObject(
+                    PyObject_Call(
+                            nativePythonObject,
+                            pyArgs.nativePythonObject,
+                            pyKwargs == null ? null : pyKwargs.nativePythonObject
+                    )
+            );
+            PythonGC.keep(ret);
+            return ret;
+        }
+
+    }
+
+
+    public PythonObject attr(String attrName) {
+        PythonGIL.assertThreadSafe();
+        return new PythonObject(PyObject_GetAttrString(nativePythonObject, attrName));
+    }
+
+
+    public PythonObject(Object javaObject) {
+        PythonGIL.assertThreadSafe();
+        if (javaObject instanceof PythonObject) {
+            owned = false;
+            nativePythonObject = ((PythonObject) javaObject).nativePythonObject;
+        } else {
+            try (PythonGC _ = PythonGC.pause()) {
+                nativePythonObject = PythonTypes.convert(javaObject).getNativePythonObject();
+            }
+            PythonGC.register(this);
+        }
+
+    }
+
+    public int toInt() {
+        return PythonTypes.INT.toJava(this).intValue();
+    }
+
+    public long toLong() {
+        return PythonTypes.INT.toJava(this);
+    }
+
+    public float toFloat() {
+        return PythonTypes.FLOAT.toJava(this).floatValue();
+    }
+
+    public double toDouble() {
+        return PythonTypes.FLOAT.toJava(this);
+    }
+
+    public boolean toBoolean() {
+        return PythonTypes.BOOL.toJava(this);
+
+    }
+
+    public List toList() {
+        return PythonTypes.LIST.toJava(this);
+    }
+
+    public Map toMap() {
+        return PythonTypes.DICT.toJava(this);
+    }
+
+    public PythonObject get(int key) {
+        PythonGIL.assertThreadSafe();
+        return new PythonObject(PyObject_GetItem(nativePythonObject, PyLong_FromLong(key)));
+    }
+
+    public PythonObject get(String key) {
+        PythonGIL.assertThreadSafe();
+        return new PythonObject(PyObject_GetItem(nativePythonObject, PyUnicode_FromString(key)));
+    }
+
+    public PythonObject get(PythonObject key) {
+        PythonGIL.assertThreadSafe();
+        return new PythonObject(PyObject_GetItem(nativePythonObject, key.nativePythonObject));
+    }
+
+    public void set(PythonObject key, PythonObject value) {
+        PythonGIL.assertThreadSafe();
+        PyObject_SetItem(nativePythonObject, key.nativePythonObject, value.nativePythonObject);
+    }
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java
new file mode 100644
index 000000000..b4806aa37
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonType.java
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+public abstract class PythonType<T> {
+
+    private final String name;
+    private final Class<T> javaType;
+
+    public PythonType(String name, Class<T> javaType) {
+        this.name = name;
+        this.javaType = javaType;
+    }
+
+    public T adapt(Object javaObject) throws PythonException {
+        return (T) javaObject;
+    }
+
+    public abstract T toJava(PythonObject pythonObject);
+
+    public abstract PythonObject toPython(T javaObject);
+
+    public boolean accepts(Object javaObject) {
+        return javaType.isAssignableFrom(javaObject.getClass());
+    }
+
+    public String getName() {
+        return name;
+    }
+
+
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java
new file mode 100644
index 000000000..0dc20f712
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonTypes.java
@@ -0,0 +1,344 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+
+import org.bytedeco.cpython.PyObject;
+
+import java.util.*;
+
+import static org.bytedeco.cpython.global.python.*;
+import static org.bytedeco.cpython.global.python.Py_DecRef;
+
+public class PythonTypes {
+
+
+    private static List<PythonType> getPrimitiveTypes() {
+        return Arrays.<PythonType>asList(STR, INT, FLOAT, BOOL);
+    }
+
+    private static List<PythonType> getCollectionTypes() {
+        return Arrays.<PythonType>asList(LIST, DICT);
+    }
+
+    private static List<PythonType> getExternalTypes() {
+        //TODO service loader
+        return new ArrayList<>();
+    }
+
+    public static List<PythonType> get() {
+        List<PythonType> ret = new ArrayList<>();
+        ret.addAll(getPrimitiveTypes());
+        ret.addAll(getCollectionTypes());
+        ret.addAll(getExternalTypes());
+        return ret;
+    }
+
+    public static PythonType get(String name) {
+        for (PythonType pt : get()) {
+            if (pt.getName().equals(name)) {  // TODO use map instead?
+                return pt;
+            }
+        }
+        throw new PythonException("Unknown python type: " + name);
+    }
+
+    public static PythonType getPythonTypeForJavaObject(Object javaObject) {
+        for (PythonType pt : get()) {
+            if (pt.accepts(javaObject)) {
+                return pt;
+            }
+        }
+        throw new PythonException("Unable to find python type for java type: " + javaObject.getClass());
+    }
+
+    public static PythonType getPythonTypeForPythonObject(PythonObject pythonObject) {
+        PyObject pyType = PyObject_Type(pythonObject.getNativePythonObject());
+        try {
+            String pyTypeStr = PythonTypes.STR.toJava(new PythonObject(pyType, false));
+
+            for (PythonType pt : get()) {
+                String pyTypeStr2 = "<class '" + pt.getName() + "'>";
+                if (pyTypeStr.equals(pyTypeStr2)) {
+                    return pt;
+                }
+            }
+            throw new PythonException("Unable to find converter for python object of type " + pyTypeStr);
+        } finally {
+            Py_DecRef(pyType);
+        }
+
+
+    }
+
+    public static PythonObject convert(Object javaObject) {
+        PythonType pt = getPythonTypeForJavaObject(javaObject);
+        return pt.toPython(pt.adapt(javaObject));
+    }
+
+    public static final PythonType<String> STR = new PythonType<String>("str", String.class) {
+
+        @Override
+        public String adapt(Object javaObject) {
+            if (javaObject instanceof String) {
+                return (String) javaObject;
+            }
+            throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to String");
+        }
+
+        @Override
+        public String toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            PyObject repr = PyObject_Str(pythonObject.getNativePythonObject());
+            PyObject str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
+            String jstr = PyBytes_AsString(str).getString();
+            Py_DecRef(repr);
+            Py_DecRef(str);
+            return jstr;
+        }
+
+        @Override
+        public PythonObject toPython(String javaObject) {
+            return new PythonObject(PyUnicode_FromString(javaObject));
+        }
+    };
+
+    public static final PythonType<Long> INT = new PythonType<Long>("int", Long.class) {
+        @Override
+        public Long adapt(Object javaObject) {
+            if (javaObject instanceof Number) {
+                return ((Number) javaObject).longValue();
+            }
+            throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Long");
+        }
+
+        @Override
+        public Long toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            long val = PyLong_AsLong(pythonObject.getNativePythonObject());
+            if (val == -1 && PyErr_Occurred() != null) {
+                throw new PythonException("Could not convert value to int: " + pythonObject.toString());
+            }
+            return val;
+        }
+
+        @Override
+        public boolean accepts(Object javaObject) {
+            return (javaObject instanceof Integer) || (javaObject instanceof Long);
+        }
+
+        @Override
+        public PythonObject toPython(Long javaObject) {
+            return new PythonObject(PyLong_FromLong(javaObject));
+        }
+    };
+
+    public static final PythonType<Double> FLOAT = new PythonType<Double>("float", Double.class) {
+
+        @Override
+        public Double adapt(Object javaObject) {
+            if (javaObject instanceof Number) {
+                return ((Number) javaObject).doubleValue();
+            }
+            throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Long");
+        }
+
+        @Override
+        public Double toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            double val = PyFloat_AsDouble(pythonObject.getNativePythonObject());
+            if (val == -1 && PyErr_Occurred() != null) {
+                throw new PythonException("Could not convert value to float: " + pythonObject.toString());
+            }
+            return val;
+        }
+
+        @Override
+        public boolean accepts(Object javaObject) {
+            return (javaObject instanceof Float) || (javaObject instanceof Double);
+        }
+
+        @Override
+        public PythonObject toPython(Double javaObject) {
+            return new PythonObject(PyFloat_FromDouble(javaObject));
+        }
+    };
+
+
+    public static final PythonType<Boolean> BOOL = new PythonType<Boolean>("bool", Boolean.class) {
+
+        @Override
+        public Boolean adapt(Object javaObject) {
+            if (javaObject instanceof Boolean) {
+                return (Boolean) javaObject;
+            }
+            throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Boolean");
+        }
+
+        @Override
+        public Boolean toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            PyObject builtins = PyImport_ImportModule("builtins");
+            PyObject boolF = PyObject_GetAttrString(builtins, "bool");
+
+            PythonObject bool = new PythonObject(boolF, false).call(pythonObject);
+            boolean ret = PyLong_AsLong(bool.getNativePythonObject()) > 0;
+            bool.del();
+            Py_DecRef(boolF);
+            Py_DecRef(builtins);
+            return ret;
+        }
+
+        @Override
+        public PythonObject toPython(Boolean javaObject) {
+            return new PythonObject(PyBool_FromLong(javaObject ? 1 : 0));
+        }
+    };
+
+
+    public static final PythonType<List> LIST = new PythonType<List>("list", List.class) {
+
+        @Override
+        public List adapt(Object javaObject) {
+            if (javaObject instanceof List) {
+                return (List) javaObject;
+            } else if (javaObject instanceof Object[]) {
+                return Arrays.asList((Object[]) javaObject);
+            } else {
+                throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to List");
+            }
+        }
+
+        @Override
+        public List toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            List ret = new ArrayList();
+            long n = PyObject_Size(pythonObject.getNativePythonObject());
+            if (n < 0) {
+                throw new PythonException("Object cannot be interpreted as a List");
+            }
+            for (long i = 0; i < n; i++) {
+                PyObject pyIndex = PyLong_FromLong(i);
+                PyObject pyItem = PyObject_GetItem(pythonObject.getNativePythonObject(),
+                        pyIndex);
+                Py_DecRef(pyIndex);
+                PythonType pyItemType = getPythonTypeForPythonObject(new PythonObject(pyItem, false));
+                ret.add(pyItemType.toJava(new PythonObject(pyItem, false)));
+                Py_DecRef(pyItem);
+            }
+            return ret;
+        }
+
+        @Override
+        public PythonObject toPython(List javaObject) {
+            PythonGIL.assertThreadSafe();
+            PyObject pyList = PyList_New(javaObject.size());
+            for (int i = 0; i < javaObject.size(); i++) {
+                Object item = javaObject.get(i);
+                PythonObject pyItem;
+                boolean owned;
+                if (item instanceof PythonObject) {
+                    pyItem = (PythonObject) item;
+                    owned = false;
+                } else if (item instanceof PyObject) {
+                    pyItem = new PythonObject((PyObject) item, false);
+                    owned = false;
+                } else {
+                    pyItem = PythonTypes.convert(item);
+                    owned = true;
+                }
+                Py_IncRef(pyItem.getNativePythonObject()); // reference will be stolen by PyList_SetItem()
+                PyList_SetItem(pyList, i, pyItem.getNativePythonObject());
+                if (owned) pyItem.del();
+            }
+            return new PythonObject(pyList);
+        }
+    };
+
+    public static final PythonType<Map> DICT = new PythonType<Map>("dict", Map.class) {
+
+        @Override
+        public Map adapt(Object javaObject) {
+            if (javaObject instanceof Map) {
+                return (Map) javaObject;
+            }
+            throw new PythonException("Cannot cast object of type " + javaObject.getClass().getName() + " to Map");
+        }
+
+        @Override
+        public Map toJava(PythonObject pythonObject) {
+            PythonGIL.assertThreadSafe();
+            HashMap ret = new HashMap();
+            PyObject dictType = new PyObject(PyDict_Type());
+            if (PyObject_IsInstance(pythonObject.getNativePythonObject(), dictType) != 1) {
+                throw new PythonException("Expected dict, received: " + pythonObject.toString());
+            }
+
+            PyObject keys = PyDict_Keys(pythonObject.getNativePythonObject());
+            PyObject keysIter = PyObject_GetIter(keys);
+            PyObject vals = PyDict_Values(pythonObject.getNativePythonObject());
+            PyObject valsIter = PyObject_GetIter(vals);
+            try {
+                long n = PyObject_Size(pythonObject.getNativePythonObject());
+                for (long i = 0; i < n; i++) {
+                    PythonObject pyKey = new PythonObject(PyIter_Next(keysIter), false);
+                    PythonObject pyVal = new PythonObject(PyIter_Next(valsIter), false);
+                    PythonType pyKeyType = getPythonTypeForPythonObject(pyKey);
+                    PythonType pyValType = getPythonTypeForPythonObject(pyVal);
+                    ret.put(pyKeyType.toJava(pyKey), pyValType.toJava(pyVal));
+                    Py_DecRef(pyKey.getNativePythonObject());
+                    Py_DecRef(pyVal.getNativePythonObject());
+                }
+            } finally {
+                Py_DecRef(keysIter);
+                Py_DecRef(valsIter);
+                Py_DecRef(keys);
+                Py_DecRef(vals);
+            }
+            return ret;
+        }
+
+        @Override
+        public PythonObject toPython(Map javaObject) {
+            PythonGIL.assertThreadSafe();
+            PyObject pyDict = PyDict_New();
+            for (Object k : javaObject.keySet()) {
+                PythonObject pyKey;
+                if (k instanceof PythonObject) {
+                    pyKey = (PythonObject) k;
+                } else if (k instanceof PyObject) {
+                    pyKey = new PythonObject((PyObject) k);
+                } else {
+                    pyKey = PythonTypes.convert(k);
+                }
+                Object v = javaObject.get(k);
+                PythonObject pyVal;
+                pyVal = PythonTypes.convert(v);
+                int errCode = PyDict_SetItem(pyDict, pyKey.getNativePythonObject(), pyVal.getNativePythonObject());
+                if (errCode != 0) {
+                    String keyStr = pyKey.toString();
+                    pyKey.del();
+                    pyVal.del();
+                    throw new PythonException("Unable to create python dictionary. Unhashable key: " + keyStr);
+                }
+                pyKey.del();
+                pyVal.del();
+            }
+            return new PythonObject(pyDict);
+        }
+    };
+}
diff --git a/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java
new file mode 100644
index 000000000..3deb4d2e7
--- /dev/null
+++ b/python4j/python4j-core/src/main/java/org/eclipse/python4j/PythonVariable.java
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.eclipse.python4j;
+
+@lombok.Data
+public class PythonVariable<T> {
+
+    private String name;
+    private String type;
+    private T value;
+
+    private static boolean validateVariableName(String s) {
+        if (s.isEmpty()) return false;
+        if (!Character.isJavaIdentifierStart(s.charAt(0))) return false;
+        for (int i = 1; i < s.length(); i++)
+            if (!Character.isJavaIdentifierPart(s.charAt(i)))
+                return false;
+        return true;
+    }
+
+    public PythonVariable(String name, PythonType<T> type, Object value) {
+        if (!validateVariableName(name)) {
+            throw new PythonException("Invalid identifier: " + name);
+        }
+        this.name = name;
+        this.type = type.getName();
+        setValue(value);
+    }
+
+    public PythonVariable(String name, PythonType<T> type) {
+        this(name, type, null);
+    }
+
+    public PythonType<T> getType() {
+        return PythonTypes.get(this.type);
+    }
+
+    public T getValue() {
+        return this.value;
+    }
+
+    public void setValue(Object value) {
+        this.value = value == null ? null : getType().adapt(value);
+    }
+
+    public PythonObject getPythonObject() {
+        return getType().toPython(value);
+    }
+
+}
diff --git a/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py b/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py
new file mode 100644
index 000000000..7ae8f6734
--- /dev/null
+++ b/python4j/python4j-core/src/main/resources/org/eclipse/python4j/pythonexec/pythonexec.py
@@ -0,0 +1,36 @@
+# /*******************************************************************************
+# * Copyright (c) 2019 Konduit K.K.
+# *
+# * This program and the accompanying materials are made available under the
+# * terms of the Apache License, Version 2.0 which is available at
+# * https://www.apache.org/licenses/LICENSE-2.0.
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# * License for the specific language governing permissions and limitations
+# * under the License.
+# *
+# * SPDX-License-Identifier: Apache-2.0
+# ******************************************************************************/
+
+import sys
+import traceback
+import json
+import inspect
+
+__python_exception__ = ""
+try:
+    pass
+    sys.stdout.flush()
+    sys.stderr.flush()
+except Exception as ex:
+    __python_exception__ = ex
+    try:
+        exc_info = sys.exc_info()
+    finally:
+        print(ex)
+        traceback.print_exception(*exc_info)
+        sys.stdout.flush()
+        sys.stderr.flush()
+
diff --git a/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java b/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java
new file mode 100644
index 000000000..9f5b43dba
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonBasicExecutionTest.java
@@ -0,0 +1,108 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+import org.eclipse.python4j.*;
+import org.junit.Assert;
+import org.junit.Test;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import java.util.*;
+
+@NotThreadSafe
+public class PythonBasicExecutionTest {
+
+    @Test
+    public void testSimpleExec() {
+        String code = "print('Hello World')";
+        PythonExecutioner.exec(code);
+    }
+
+    @Test
+    public void testBadCode() throws Exception {
+        try {
+            String code = "printx('Hello world')";
+            PythonExecutioner.exec(code);
+        } catch (Exception e) {
+            Assert.assertEquals("NameError: name 'printx' is not defined", e.getMessage());
+            return;
+        }
+        throw new Exception("Bad code did not throw!");
+    }
+
+    @Test
+    public void testExecWithInputs() {
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello "));
+        inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World"));
+        String code = "print(x + y)";
+        PythonExecutioner.exec(code, inputs, null);
+
+    }
+
+    @Test
+    public void testExecWithInputsAndOutputs() {
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello "));
+        inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World"));
+        PythonVariable out = new PythonVariable<>("z", PythonTypes.STR);
+        String code = "z = x + y";
+        PythonExecutioner.exec(code, inputs, Collections.singletonList(out));
+        Assert.assertEquals("Hello World", out.getValue());
+
+    }
+
+    @Test
+    public void testExecAndReturnAllVariables() {
+        PythonContextManager.reset();
+        String code = "a = 5\nb = '10'\nc = 20.0";
+        List<PythonVariable> vars = PythonExecutioner.execAndReturnAllVariables(code);
+
+        Assert.assertEquals("a", vars.get(0).getName());
+        Assert.assertEquals(PythonTypes.INT, vars.get(0).getType());
+        Assert.assertEquals(5L, (long) vars.get(0).getValue());
+
+        Assert.assertEquals("b", vars.get(1).getName());
+        Assert.assertEquals(PythonTypes.STR, vars.get(1).getType());
+        Assert.assertEquals("10", vars.get(1).getValue().toString());
+
+        Assert.assertEquals("c", vars.get(2).getName());
+        Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType());
+        Assert.assertEquals(20.0, (double) vars.get(2).getValue(), 1e-5);
+    }
+
+    @Test
+    public void testExecWithInputsAndReturnAllVariables() {
+        PythonContextManager.reset();
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 5));
+        String code = "b = '10'\nc = 20.0 + a";
+        List<PythonVariable> vars = PythonExecutioner.execAndReturnAllVariables(code, inputs);
+
+        Assert.assertEquals("a", vars.get(0).getName());
+        Assert.assertEquals(PythonTypes.INT, vars.get(0).getType());
+        Assert.assertEquals(5L, (long) vars.get(0).getValue());
+
+        Assert.assertEquals("b", vars.get(1).getName());
+        Assert.assertEquals(PythonTypes.STR, vars.get(1).getType());
+        Assert.assertEquals("10", vars.get(1).getValue().toString());
+
+        Assert.assertEquals("c", vars.get(2).getName());
+        Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType());
+        Assert.assertEquals(25.0, (double) vars.get(2).getValue(), 1e-5);
+    }
+
+}
diff --git a/python4j/python4j-core/src/test/java/PythonCollectionsTest.java b/python4j/python4j-core/src/test/java/PythonCollectionsTest.java
new file mode 100644
index 000000000..7e63d9d28
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonCollectionsTest.java
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+import org.eclipse.python4j.PythonException;
+import org.eclipse.python4j.PythonObject;
+import org.eclipse.python4j.PythonTypes;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.*;
+
+
+@javax.annotation.concurrent.NotThreadSafe
+public class PythonCollectionsTest {
+
+
+    @Test
+    public void testPythonDictFromMap() throws PythonException {
+        Map map = new HashMap();
+        map.put("a", 1);
+        map.put(1, "a");
+        map.put("list1", Arrays.asList(1, 2.0, 3, 4f));
+        Map innerMap = new HashMap();
+        innerMap.put("b", 2);
+        innerMap.put(2, "b");
+        map.put("innermap", innerMap);
+        map.put("list2", Arrays.asList(4, "5", innerMap, false, true));
+        PythonObject dict = PythonTypes.convert(map);
+        Map map2 = PythonTypes.DICT.toJava(dict);
+        Assert.assertEquals(map.toString(), map2.toString());
+    }
+
+    @Test
+    public void testPythonListFromList() throws PythonException{
+        List<Object> list = new ArrayList<>();
+        list.add(1);
+        list.add("2");
+        list.add(Arrays.asList("a", 1.0, 2f, 10, true, false));
+        Map map = new HashMap();
+        map.put("a", 1);
+        map.put(1, "a");
+        map.put("list1", Arrays.asList(1, 2.0, 3, 4f));
+        list.add(map);
+        PythonObject dict = PythonTypes.convert(list);
+        List list2 = PythonTypes.LIST.toJava(dict);
+        Assert.assertEquals(list.toString(), list2.toString());
+    }
+}
diff --git a/python4j/python4j-core/src/test/java/PythonContextManagerTest.java b/python4j/python4j-core/src/test/java/PythonContextManagerTest.java
new file mode 100644
index 000000000..a4451764c
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonContextManagerTest.java
@@ -0,0 +1,51 @@
+
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+import org.eclipse.python4j.Python;
+import org.eclipse.python4j.PythonContextManager;
+import org.eclipse.python4j.PythonExecutioner;
+import org.junit.Assert;
+import org.junit.Test;
+import javax.annotation.concurrent.NotThreadSafe;
+
+@NotThreadSafe
+public class PythonContextManagerTest {
+
+    @Test
+    public void testInt() throws Exception{
+        Python.setContext("context1");
+        Python.exec("a = 1");
+       Python.setContext("context2");
+        Python.exec("a = 2");
+        Python.setContext("context3");
+        Python.exec("a = 3");
+
+
+        Python.setContext("context1");
+        Assert.assertEquals(1, PythonExecutioner.getVariable("a").toInt());
+
+        Python.setContext("context2");
+        Assert.assertEquals(2, PythonExecutioner.getVariable("a").toInt());
+
+        Python.setContext("context3");
+        Assert.assertEquals(3, PythonExecutioner.getVariable("a").toInt());
+
+        PythonContextManager.deleteNonMainContexts();
+    }
+
+}
diff --git a/python4j/python4j-core/src/test/java/PythonGCTest.java b/python4j/python4j-core/src/test/java/PythonGCTest.java
new file mode 100644
index 000000000..f8c6ecba5
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonGCTest.java
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.eclipse.python4j.Python;
+import org.eclipse.python4j.PythonGC;
+import org.eclipse.python4j.PythonObject;
+import org.junit.Assert;
+import org.junit.Test;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+
+@NotThreadSafe
+public class PythonGCTest {
+
+    @Test
+    public void testGC() throws Exception{
+        PythonObject gcModule = Python.importModule("gc");
+        PythonObject getObjects = gcModule.attr("get_objects");
+        PythonObject pyObjCount1 = Python.len(getObjects.call());
+        long objCount1 =  pyObjCount1.toLong();
+        PythonObject pyList = Python.list();
+        pyList.attr("append").call("a");
+        pyList.attr("append").call(1.0);
+        pyList.attr("append").call(true);
+        PythonObject pyObjCount2 = Python.len(getObjects.call());
+        long objCount2 =  pyObjCount2.toLong();
+        long diff = objCount2 - objCount1;
+        Assert.assertTrue(diff > 2);
+        try(PythonGC gc = PythonGC.watch()){
+            PythonObject pyList2 = Python.list();
+            pyList2.attr("append").call("a");
+            pyList2.attr("append").call(1.0);
+            pyList2.attr("append").call(true);
+        }
+        PythonObject pyObjCount3 = Python.len(getObjects.call());
+        long objCount3 =  pyObjCount3.toLong();
+        diff = objCount3 - objCount2;
+        Assert.assertEquals(2, diff);// 2 objects created during function call
+    }
+}
diff --git a/python4j/python4j-core/src/test/java/PythonJobTest.java b/python4j/python4j-core/src/test/java/PythonJobTest.java
new file mode 100644
index 000000000..016045a25
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonJobTest.java
@@ -0,0 +1,287 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.eclipse.python4j.PythonContextManager;
+import org.eclipse.python4j.PythonJob;
+import org.eclipse.python4j.PythonTypes;
+import org.eclipse.python4j.PythonVariable;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+
+@javax.annotation.concurrent.NotThreadSafe
+public class PythonJobTest {
+
+    @Test
+    public void testPythonJobBasic() throws Exception{
+        PythonContextManager.deleteNonMainContexts();
+
+        String code = "c = a + b";
+        PythonJob job = new PythonJob("job1", code, false);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+        List<PythonVariable> outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.INT));
+
+
+        job.exec(inputs, outputs);
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(5L, (long)outputs.get(0).getValue());
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+
+        outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT));
+
+
+        job.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(7.0, (double)outputs.get(0).getValue(), 1e-5);
+
+
+    }
+
+    @Test
+    public void testPythonJobReturnAllVariables()throws Exception{
+        PythonContextManager.deleteNonMainContexts();
+
+        String code = "c = a + b";
+        PythonJob job = new PythonJob("job1", code, false);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+
+        List<PythonVariable> outputs = job.execAndReturnAllVariables(inputs);
+
+
+        assertEquals("a", outputs.get(0).getName());
+        assertEquals(2L, (long)outputs.get(0).getValue());
+        assertEquals("b", outputs.get(1).getName());
+        assertEquals(3L, (long)outputs.get(1).getValue());
+        assertEquals("c", outputs.get(2).getName());
+        assertEquals(5L, (long)outputs.get(2).getValue());
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+        outputs = job.execAndReturnAllVariables(inputs);
+        assertEquals("a", outputs.get(0).getName());
+        assertEquals(3.0, (double)outputs.get(0).getValue(), 1e-5);
+        assertEquals("b", outputs.get(1).getName());
+        assertEquals(4.0, (double)outputs.get(1).getValue(), 1e-5);
+        assertEquals("c", outputs.get(2).getName());
+        assertEquals(7.0, (double)outputs.get(2).getValue(), 1e-5);
+
+    }
+
+
+    @Test
+    public void testMultiplePythonJobsParallel()throws Exception{
+        PythonContextManager.deleteNonMainContexts();
+        String code1 = "c = a + b";
+        PythonJob job1 = new PythonJob("job1", code1, false);
+
+        String code2 = "c = a - b";
+        PythonJob job2 = new PythonJob("job2", code2, false);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+
+        List<PythonVariable> outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.INT));
+
+        job1.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(5L, (long)outputs.get(0).getValue());
+
+
+        job2.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(-1L, (long)outputs.get(0).getValue());
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+
+        outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT));
+
+
+        job1.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(7.0, (double)outputs.get(0).getValue(), 1e-5);
+
+        job2.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(-1., (double)outputs.get(0).getValue(), 1e-5);
+
+    }
+
+
+    @Test
+    public void testPythonJobSetupRun()throws Exception{
+
+        PythonContextManager.deleteNonMainContexts();
+        String code = "five=None\n" +
+                "def setup():\n" +
+                "    global five\n"+
+                "    five = 5\n\n" +
+                "def run(a, b):\n" +
+                "    c = a + b + five\n"+
+                "    return {'c':c}\n\n";
+        PythonJob job = new PythonJob("job1", code, true);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+        List<PythonVariable> outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.INT));
+        job.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(10L, (long)outputs.get(0).getValue());
+
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+
+
+        outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT));
+
+        job.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(12.0, (double)outputs.get(0).getValue(), 1e-5);
+
+    }
+    @Test
+    public void testPythonJobSetupRunAndReturnAllVariables()throws Exception{
+        PythonContextManager.deleteNonMainContexts();
+        String code = "five=None\n" +
+                "c=None\n"+
+                "def setup():\n" +
+                "    global five\n"+
+                "    five = 5\n\n" +
+                "def run(a, b):\n" +
+                "    global c\n" +
+                "    c = a + b + five\n";
+        PythonJob job = new PythonJob("job1", code, true);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+        List<PythonVariable> outputs = job.execAndReturnAllVariables(inputs);
+
+        assertEquals("c", outputs.get(1).getName());
+        assertEquals(10L, (long)outputs.get(1).getValue());
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+
+        outputs = job.execAndReturnAllVariables(inputs);
+
+
+        assertEquals("c", outputs.get(1).getName());
+        assertEquals(12.0, (double)outputs.get(1).getValue(), 1e-5);
+
+
+
+    }
+
+    @Test
+    public void testMultiplePythonJobsSetupRunParallel()throws Exception{
+        PythonContextManager.deleteNonMainContexts();
+
+        String code1 = "five=None\n" +
+                "def setup():\n" +
+                "    global five\n"+
+                "    five = 5\n\n" +
+                "def run(a, b):\n" +
+                "    c = a + b + five\n"+
+                "    return {'c':c}\n\n";
+        PythonJob job1 = new PythonJob("job1", code1, true);
+
+        String code2 = "five=None\n" +
+                "def setup():\n" +
+                "    global five\n"+
+                "    five = 5\n\n" +
+                "def run(a, b):\n" +
+                "    c = a + b - five\n"+
+                "    return {'c':c}\n\n";
+        PythonJob job2 = new PythonJob("job2", code2, true);
+
+        List<PythonVariable> inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 2));
+        inputs.add(new PythonVariable<>("b", PythonTypes.INT, 3));
+
+
+        List<PythonVariable> outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.INT));
+
+        job1.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(10L, (long)outputs.get(0).getValue());
+
+        job2.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(0L, (long)outputs.get(0).getValue());
+
+        inputs = new ArrayList<>();
+        inputs.add(new PythonVariable<>("a", PythonTypes.FLOAT, 3.0));
+        inputs.add(new PythonVariable<>("b", PythonTypes.FLOAT, 4.0));
+
+        outputs = new ArrayList<>();
+        outputs.add(new PythonVariable<>("c", PythonTypes.FLOAT));
+
+
+        job1.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(12.0, (double)outputs.get(0).getValue(), 1e-5);
+
+        job2.exec(inputs, outputs);
+
+        assertEquals("c", outputs.get(0).getName());
+        assertEquals(2.0, (double)outputs.get(0).getValue(), 1e-5);
+
+    }
+
+}
diff --git a/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java b/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java
new file mode 100644
index 000000000..ec544b65f
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonMultiThreadTest.java
@@ -0,0 +1,169 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.eclipse.python4j.*;
+import org.junit.Assert;
+import org.junit.Test;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+
+@NotThreadSafe
+public class PythonMultiThreadTest {
+
+    @Test
+    public void testMultiThreading1()throws Throwable{
+        final List<Throwable> exceptions = Collections.synchronizedList(new ArrayList<Throwable>());
+        Runnable runnable = new Runnable() {
+            @Override
+            public void run() {
+                try(PythonGIL gil = PythonGIL.lock()){
+                    try(PythonGC gc = PythonGC.watch()){
+                        List<PythonVariable> inputs = new ArrayList<>();
+                        inputs.add(new PythonVariable<>("x", PythonTypes.STR, "Hello "));
+                        inputs.add(new PythonVariable<>("y", PythonTypes.STR, "World"));
+                        PythonVariable out = new PythonVariable<>("z", PythonTypes.STR);
+                        String code = "z = x + y";
+                        PythonExecutioner.exec(code, inputs, Collections.singletonList(out));
+                        Assert.assertEquals("Hello World", out.getValue());
+                       System.out.println(out.getValue() + " From thread " + Thread.currentThread().getId());
+                    }
+                }catch (Throwable e){
+                    exceptions.add(e);
+                }
+            }
+        };
+
+        int numThreads = 10;
+        Thread[] threads = new Thread[numThreads];
+        for (int i = 0; i < threads.length; i++){
+            threads[i] = new Thread(runnable);
+        }
+        for (int i = 0; i < threads.length; i++){
+            threads[i].start();
+        }
+        Thread.sleep(100);
+        for (int i = 0; i < threads.length; i++){
+            threads[i].join();
+        }
+        if (!exceptions.isEmpty()){
+            throw(exceptions.get(0));
+        }
+
+    }
+    @Test
+    public void testMultiThreading2()throws Throwable{
+        final List<Throwable> exceptions = Collections.synchronizedList(new ArrayList<Throwable>());
+        Runnable runnable = new Runnable() {
+            @Override
+            public void run() {
+                try(PythonGIL gil = PythonGIL.lock()){
+                    try(PythonGC gc = PythonGC.watch()){
+                        PythonContextManager.reset();
+                        PythonContextManager.reset();
+                        List<PythonVariable> inputs = new ArrayList<>();
+                        inputs.add(new PythonVariable<>("a", PythonTypes.INT, 5));
+                        String code = "b = '10'\nc = 20.0 + a";
+                        List<PythonVariable> vars = PythonExecutioner.execAndReturnAllVariables(code, inputs);
+
+                        Assert.assertEquals("a", vars.get(0).getName());
+                        Assert.assertEquals(PythonTypes.INT, vars.get(0).getType());
+                        Assert.assertEquals(5L, (long)vars.get(0).getValue());
+
+                        Assert.assertEquals("b", vars.get(1).getName());
+                        Assert.assertEquals(PythonTypes.STR, vars.get(1).getType());
+                        Assert.assertEquals("10", vars.get(1).getValue().toString());
+
+                        Assert.assertEquals("c", vars.get(2).getName());
+                        Assert.assertEquals(PythonTypes.FLOAT, vars.get(2).getType());
+                        Assert.assertEquals(25.0, (double)vars.get(2).getValue(), 1e-5);
+                    }
+                }catch (Throwable e){
+                    exceptions.add(e);
+                }
+            }
+        };
+
+        int numThreads = 10;
+        Thread[] threads = new Thread[numThreads];
+        for (int i = 0; i < threads.length; i++){
+            threads[i] = new Thread(runnable);
+        }
+        for (int i = 0; i < threads.length; i++){
+            threads[i].start();
+        }
+        Thread.sleep(100);
+        for (int i = 0; i < threads.length; i++){
+            threads[i].join();
+        }
+        if (!exceptions.isEmpty()){
+            throw(exceptions.get(0));
+        }
+    }
+
+    @Test
+    public void testMultiThreading3() throws Throwable{
+        PythonContextManager.deleteNonMainContexts();
+
+        String code = "c = a + b";
+        final PythonJob job = new PythonJob("job1", code, false);
+
+        final List<Throwable> exceptions = Collections.synchronizedList(new ArrayList<Throwable>());
+
+        class JobThread extends Thread{
+            private int a, b, c;
+            public JobThread(int a, int b, int c){
+                this.a = a;
+                this.b = b;
+                this.c = c;
+            }
+            @Override
+            public void run(){
+                try{
+                    PythonVariable<Long> out = new PythonVariable<>("c", PythonTypes.INT);
+                    job.exec(Arrays.<PythonVariable>asList(new PythonVariable<>("a", PythonTypes.INT, a),
+                            new PythonVariable<>("b", PythonTypes.INT, b)),
+                            Collections.<PythonVariable>singletonList(out));
+                    Assert.assertEquals(c, out.getValue().intValue());
+                }catch (Exception e){
+                    exceptions.add(e);
+                }
+
+            }
+        }
+        int numThreads = 10;
+        JobThread[] threads = new JobThread[numThreads];
+        for (int i=0; i < threads.length; i++){
+            threads[i] = new JobThread(i, i + 3, 2 * i +3);
+        }
+
+        for (int i = 0; i < threads.length; i++){
+            threads[i].start();
+        }
+        Thread.sleep(100);
+        for (int i = 0; i < threads.length; i++){
+            threads[i].join();
+        }
+
+        if (!exceptions.isEmpty()){
+            throw(exceptions.get(0));
+        }
+    }
+}
diff --git a/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java b/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java
new file mode 100644
index 000000000..ae10ed8dc
--- /dev/null
+++ b/python4j/python4j-core/src/test/java/PythonPrimitiveTypesTest.java
@@ -0,0 +1,82 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+import org.eclipse.python4j.PythonException;
+import org.eclipse.python4j.PythonObject;
+import org.eclipse.python4j.PythonTypes;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PythonPrimitiveTypesTest {
+
+    @Test
+    public void testInt() throws PythonException {
+        long j = 3;
+        PythonObject p = PythonTypes.INT.toPython(j);
+        long j2 = PythonTypes.INT.toJava(p);
+
+        Assert.assertEquals(j, j2);
+
+        PythonObject p2 = PythonTypes.convert(j);
+        long j3 = PythonTypes.INT.toJava(p2);
+
+        Assert.assertEquals(j, j3);
+    }
+
+    @Test
+    public void testStr() throws PythonException{
+        String s = "abcd";
+        PythonObject p = PythonTypes.STR.toPython(s);
+        String s2 = PythonTypes.STR.toJava(p);
+
+        Assert.assertEquals(s, s2);
+
+        PythonObject p2 = PythonTypes.convert(s);
+        String s3 = PythonTypes.STR.toJava(p2);
+
+        Assert.assertEquals(s, s3);
+    }
+
+    @Test
+    public void testFloat() throws PythonException{
+        double f = 7;
+        PythonObject p = PythonTypes.FLOAT.toPython(f);
+        double f2 = PythonTypes.FLOAT.toJava(p);
+
+        Assert.assertEquals(f, f2, 1e-5);
+
+        PythonObject p2 = PythonTypes.convert(f);
+        double f3 = PythonTypes.FLOAT.toJava(p2);
+
+        Assert.assertEquals(f, f3, 1e-5);
+    }
+
+    @Test
+    public void testBool() throws PythonException{
+        boolean b = true;
+        PythonObject p = PythonTypes.BOOL.toPython(b);
+        boolean b2 = PythonTypes.BOOL.toJava(p);
+
+        Assert.assertEquals(b, b2);
+
+        PythonObject p2 = PythonTypes.convert(b);
+        boolean b3 = PythonTypes.BOOL.toJava(p2);
+
+        Assert.assertEquals(b, b3);
+    }
+
+}
diff --git a/python4j/python4j-numpy/pom.xml b/python4j/python4j-numpy/pom.xml
new file mode 100644
index 000000000..527a9343f
--- /dev/null
+++ b/python4j/python4j-numpy/pom.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>python4j-parent</artifactId>
+        <groupId>org.eclipse</groupId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>python4j-numpy</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>numpy-platform</artifactId>
+            <version>${numpy.javacpp.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>nd4j-native-api</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>nd4j-common-tests</artifactId>
+            <version>${nd4j.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <profiles>
+        <profile>
+            <id>test-nd4j-native</id>
+        </profile>
+        <profile>
+            <id>test-nd4j-cuda-10.2</id>
+        </profile>
+    </profiles>
+
+</project>
\ No newline at end of file

From 880080312395c7827814fbe7619e976a248631a4 Mon Sep 17 00:00:00 2001
From: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Date: Thu, 21 May 2020 10:04:05 +0300
Subject: [PATCH 11/21] Switch to static linking for Android (#472)

---
 libnd4j/cmake/android-arm.cmake    | 2 +-
 libnd4j/cmake/android-arm64.cmake  | 2 +-
 libnd4j/cmake/android-x86.cmake    | 2 +-
 libnd4j/cmake/android-x86_64.cmake | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libnd4j/cmake/android-arm.cmake b/libnd4j/cmake/android-arm.cmake
index 427bc6a34..4db515400 100644
--- a/libnd4j/cmake/android-arm.cmake
+++ b/libnd4j/cmake/android-arm.cmake
@@ -3,7 +3,7 @@
 set(CMAKE_SYSTEM_NAME Android)
 set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a)
 set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
-set(CMAKE_ANDROID_STL_TYPE c++_shared)
+set(CMAKE_ANDROID_STL_TYPE c++_static)
 set(CMAKE_SYSTEM_VERSION  "$ENV{ANDROID_VERSION}")
 set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
 
diff --git a/libnd4j/cmake/android-arm64.cmake b/libnd4j/cmake/android-arm64.cmake
index 33ee454e7..68a4e60a5 100644
--- a/libnd4j/cmake/android-arm64.cmake
+++ b/libnd4j/cmake/android-arm64.cmake
@@ -3,7 +3,7 @@
 set(CMAKE_SYSTEM_NAME Android)
 set(CMAKE_ANDROID_ARCH_ABI arm64-v8a)
 set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
-set(CMAKE_ANDROID_STL_TYPE c++_shared)
+set(CMAKE_ANDROID_STL_TYPE c++_static)
 set(CMAKE_SYSTEM_VERSION  "$ENV{ANDROID_VERSION}")
 set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
 
diff --git a/libnd4j/cmake/android-x86.cmake b/libnd4j/cmake/android-x86.cmake
index 7290b0b8d..be6600bcc 100644
--- a/libnd4j/cmake/android-x86.cmake
+++ b/libnd4j/cmake/android-x86.cmake
@@ -3,7 +3,7 @@
 set(CMAKE_SYSTEM_NAME Android)
 set(CMAKE_ANDROID_ARCH_ABI x86)
 set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
-set(CMAKE_ANDROID_STL_TYPE c++_shared)
+set(CMAKE_ANDROID_STL_TYPE c++_static)
 set(CMAKE_SYSTEM_VERSION  "$ENV{ANDROID_VERSION}")
 set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
 
diff --git a/libnd4j/cmake/android-x86_64.cmake b/libnd4j/cmake/android-x86_64.cmake
index 5ff797910..ea9b5e356 100644
--- a/libnd4j/cmake/android-x86_64.cmake
+++ b/libnd4j/cmake/android-x86_64.cmake
@@ -3,7 +3,7 @@
 set(CMAKE_SYSTEM_NAME Android)
 set(CMAKE_ANDROID_ARCH_ABI x86_64)
 set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
-set(CMAKE_ANDROID_STL_TYPE c++_shared)
+set(CMAKE_ANDROID_STL_TYPE c++_static)
 set(CMAKE_SYSTEM_VERSION  "$ENV{ANDROID_VERSION}")
 set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
 

From ecdee6369dde14883ea753aa5d080d329816f6cc Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Sun, 24 May 2020 14:47:17 +0400
Subject: [PATCH 12/21] IntIndexer -> UIntIndexer (#476)

---
 .../nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
index 88d0cbe44..12a76f2b9 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@@ -1131,6 +1131,7 @@ public class Nd4j {
             case LONG:
                 return LongIndexer.create((LongPointer) pointer);
             case UINT32:
+                return UIntIndexer.create((IntPointer) pointer);
             case INT:
                 return IntIndexer.create((IntPointer) pointer);
             case UINT16:

From a18417193d2d0c9f0de516bc325f9c43c92be55e Mon Sep 17 00:00:00 2001
From: shugeo <sgazeos@gmail.com>
Date: Tue, 26 May 2020 14:13:48 +0300
Subject: [PATCH 13/21] Shugeo resize area fix4 (#465)

* Restore resize_area test suite.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed resize_area kernel for cuda platform to avoid range violation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed resizeAreaKernel start.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed potential error handling with resize area cuda implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>
---
 .../declarable/helpers/cuda/image_resize.cu   | 35 +++++++++++++++++--
 .../layers_tests/DeclarableOpsTests11.cpp     |  4 +--
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
index d483f87b3..180c8ad0e 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
@@ -1066,7 +1066,7 @@ namespace helpers {
                 const Nd4jLong yStart = math::nd4j_floor<float, Nd4jLong>(inY);
                 const Nd4jLong yEnd = math::nd4j_ceil<float, Nd4jLong>(inY1);
                 auto scalesDim = yEnd - yStart;
-                auto yScaleCache = cachePool + (batch * pSt->outWidth + y) * scalesDim * sizeof(ScaleCache<T>);
+                auto yScaleCache = cachePool + (batch * pSt->outHeight + y) * pSt->outWidth;
 
                 //auto startPtr = sharedPtr + y * scalesDim * sizeof(float);
                 //float* yScales = yScalesShare + y * sizeof(float) * scalesDim;//reinterpret_cast<float*>(startPtr); //shared + y * scalesDim * y + scalesDim * sizeof(T const *) [scalesDim];
@@ -1113,14 +1113,34 @@ namespace helpers {
         auto outputPtr = reinterpret_cast<float*>(output->specialBuffer()); // output is always float. TO DO: provide another float types also with  template <typename X, typename Z> declaration
         ImageResizerState* pSt;
         auto err = cudaMalloc(&pSt, sizeof(ImageResizerState));
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: Cannot allocate memory for ImageResizerState", err);
+        }
+
         err = cudaMemcpyAsync(pSt, &st, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream);
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: Cannot copy to device memory", err);
+        }
         ScaleCache<T>* cachePool;
-        err = cudaMalloc(&cachePool, sizeof(ScaleCache<T>) * st.batchSize * st.outWidth * st.outHeight);
-        resizeAreaKernel<T><<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr,
+        auto cachePoolSize = sizeof(ScaleCache<T>) * st.batchSize * st.outWidth * st.outHeight;
+        err = cudaMalloc(&cachePool, cachePoolSize);
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: Cannot allocate memory for cache", err);
+        }
+        resizeAreaKernel<T><<<128, 128, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr,
                 output->specialShapeInfo(), cachePool);
         err = cudaStreamSynchronize(*stream);
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: An error occured with kernel running", err);
+        }
         err = cudaFree(cachePool);
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: Cannot deallocate memory for cache", err);
+        }
         err = cudaFree(pSt);
+        if (err != 0) {
+            throw cuda_exception::build("helpers::resizeArea: Cannot deallocate memory for ImageResizeState", err);
+        }
     }
 // ------------------------------------------------------------------------------------------------------------------ //
     template <typename T>
@@ -1134,11 +1154,20 @@ namespace helpers {
             CachedInterpolation* xCached;
             //(st.outWidth);
             auto err = cudaMalloc(&xCached, sizeof(CachedInterpolation) * st.outWidth);
+            if (err != 0) {
+                throw cuda_exception::build("helpers::resizeAreaFunctor_: Cannot allocate memory for cached interpolations", err);
+            }
             NDArray::prepareSpecialUse({output}, {image});
             fillInterpolationCache<<<128, 128, 256, *stream>>>(xCached, st.outWidth, st.inWidth, st.widthScale);
             resizeArea<T>(stream, st, xCached, image, output);
             err = cudaStreamSynchronize(*stream);
+            if (err != 0) {
+                throw cuda_exception::build("helpers::resizeAreaFunctor_: Error occured when kernel was running", err);
+            }
             err = cudaFree(xCached);
+            if (err != 0) {
+                throw cuda_exception::build("helpers::resizeAreaFunctor_: Cannot deallocate memory for cached interpolations", err);
+            }
             NDArray::registerSpecialUse({output}, {image});
         }
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
index e4391c688..23c40ebae 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
@@ -1054,7 +1054,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeBicubic_Test8) {
     ASSERT_TRUE(testData.equalsTo(result));
 }
 
-/*
+
 TEST_F(DeclarableOpsTests11, ImageResizeArea_Test1) {
 
     NDArray input    = NDArrayFactory::create<double>('c', {1, 3, 3, 4});
@@ -1532,7 +1532,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test15) {
     ASSERT_TRUE(expected.isSameShape(result));
     ASSERT_TRUE(expected.equalsTo(result));
 }
- */
+
 
 ///////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, summaryStatsData_test1) {

From 5568b9d72ff519a3cdaa4db773a02cf72726582e Mon Sep 17 00:00:00 2001
From: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Date: Wed, 27 May 2020 07:41:02 -0400
Subject: [PATCH 14/21] RL4J: Add AgentLearner (#470)

Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
---
 .../org/deeplearning4j/rl4j/agent/Agent.java  | 113 +++++++---
 .../rl4j/agent/AgentLearner.java              | 115 ++++++++++
 .../org/deeplearning4j/rl4j/agent/IAgent.java |  55 +++++
 .../rl4j/agent/IAgentLearner.java             |  24 ++
 .../agent/learning/ILearningBehavior.java     |  49 ++++
 .../rl4j/agent/learning/LearningBehavior.java |  59 +++++
 .../rl4j/agent/listener/AgentListener.java    |  47 +++-
 .../agent/listener/AgentListenerList.java     |  39 ++++
 .../agent/update/DQNNeuralNetUpdateRule.java  |  62 +++++
 .../rl4j/agent/update/Gradients.java          |  26 +++
 .../rl4j/agent/update/IUpdateRule.java        |  37 +++
 .../rl4j/environment/ActionSchema.java        |   9 -
 .../rl4j/environment/Environment.java         |  43 ++++
 .../rl4j/environment/IActionSchema.java       |  26 +++
 .../rl4j/environment/IntegerActionSchema.java |  47 ++++
 .../rl4j/environment/Schema.java              |  18 +-
 .../rl4j/environment/StepResult.java          |  15 ++
 .../rl4j/experience/ExperienceHandler.java    |   5 +
 .../ReplayMemoryExperienceHandler.java        |   7 +
 .../StateActionExperienceHandler.java         |  17 +-
 .../rl4j/helper/INDArrayHelper.java           |  31 ++-
 .../learning/async/AsyncThreadDiscrete.java   |  14 +-
 .../AsyncNStepQLearningThreadDiscrete.java    |   3 +-
 .../discrete/QLearningUpdateAlgorithm.java    |  24 +-
 .../rl4j/learning/sync/ExpReplay.java         |   5 +
 .../rl4j/learning/sync/IExpReplay.java        |   5 +
 .../learning/sync/qlearning/QLearning.java    |  19 +-
 .../qlearning/discrete/QLearningDiscrete.java |  70 +++---
 .../rl4j/mdp/CartpoleEnvironment.java         |  17 +-
 .../deeplearning4j/rl4j/policy/EpsGreedy.java |  87 +++++++-
 .../rl4j/policy/INeuralNetPolicy.java         |   7 +
 .../deeplearning4j/rl4j/policy/Policy.java    |   2 +-
 .../rl4j/agent/AgentLearnerTest.java          | 211 ++++++++++++++++++
 .../deeplearning4j/rl4j/agent/AgentTest.java  |  44 ++--
 .../agent/learning/LearningBehaviorTest.java  | 133 +++++++++++
 .../ReplayMemoryExperienceHandlerTest.java    | 100 ++++++---
 .../StateActionExperienceHandlerTest.java     |  70 +++++-
 .../rl4j/helper/INDArrayHelperTest.java       |  21 ++
 .../QLearningUpdateAlgorithmTest.java         |  75 ++++---
 .../discrete/QLearningDiscreteTest.java       |  34 ++-
 40 files changed, 1541 insertions(+), 244 deletions(-)
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java
 delete mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java
 create mode 100644 rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java
 create mode 100644 rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java
 create mode 100644 rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java

diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java
index 999f12e8c..198c2a1ca 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/Agent.java
@@ -1,3 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.agent;
 
 import lombok.AccessLevel;
@@ -14,7 +29,13 @@ import org.nd4j.common.base.Preconditions;
 
 import java.util.Map;
 
-public class Agent<ACTION> {
+/**
+ * An agent implementation. The Agent will use a {@link IPolicy} to interact with an {@link Environment} and receive
+ * a reward.
+ *
+ * @param <ACTION> The type of action
+ */
+public class Agent<ACTION> implements IAgent<ACTION> {
     @Getter
     private final String id;
 
@@ -37,19 +58,28 @@ public class Agent<ACTION> {
     private ACTION lastAction;
 
     @Getter
-    private int episodeStepNumber;
+    private int episodeStepCount;
 
     @Getter
     private double reward;
 
     protected boolean canContinue;
 
-    private Agent(Builder<ACTION> builder) {
-        this.environment = builder.environment;
-        this.transformProcess = builder.transformProcess;
-        this.policy = builder.policy;
-        this.maxEpisodeSteps = builder.maxEpisodeSteps;
-        this.id = builder.id;
+    /**
+     * @param environment The {@link Environment} to be used
+     * @param transformProcess The {@link TransformProcess} to be used to transform the raw observations into usable ones.
+     * @param policy The {@link IPolicy} to be used
+     * @param maxEpisodeSteps The maximum number of steps an episode can have before being interrupted. Use null to have no max.
+     * @param id A user-supplied id to identify the instance.
+     */
+    public Agent(@NonNull Environment<ACTION> environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy<ACTION> policy, Integer maxEpisodeSteps, String id) {
+        Preconditions.checkArgument(maxEpisodeSteps == null || maxEpisodeSteps > 0, "maxEpisodeSteps must be null (no maximum) or greater than 0, got", maxEpisodeSteps);
+
+        this.environment = environment;
+        this.transformProcess = transformProcess;
+        this.policy = policy;
+        this.maxEpisodeSteps = maxEpisodeSteps;
+        this.id = id;
 
         listeners = buildListenerList();
     }
@@ -58,10 +88,17 @@ public class Agent<ACTION> {
         return new AgentListenerList<ACTION>();
     }
 
+    /**
+     * Add a {@link AgentListener} that will be notified when agent events happens
+     * @param listener
+     */
     public void addListener(AgentListener listener) {
         listeners.add(listener);
     }
 
+    /**
+     * This will run a single episode
+     */
     public void run() {
         runEpisode();
     }
@@ -80,7 +117,7 @@ public class Agent<ACTION> {
 
         canContinue = listeners.notifyBeforeEpisode(this);
 
-        while (canContinue && !environment.isEpisodeFinished() && (maxEpisodeSteps == null || episodeStepNumber < maxEpisodeSteps)) {
+        while (canContinue && !environment.isEpisodeFinished() && (maxEpisodeSteps == null || episodeStepCount < maxEpisodeSteps)) {
             performStep();
         }
 
@@ -100,9 +137,9 @@ public class Agent<ACTION> {
     }
 
     protected void resetEnvironment() {
-        episodeStepNumber = 0;
+        episodeStepCount = 0;
         Map<String, Object> channelsData = environment.reset();
-        this.observation = transformProcess.transform(channelsData, episodeStepNumber, false);
+        this.observation = transformProcess.transform(channelsData, episodeStepCount, false);
     }
 
     protected void resetPolicy() {
@@ -125,7 +162,6 @@ public class Agent<ACTION> {
         }
 
         StepResult stepResult = act(action);
-        handleStepResult(stepResult);
 
         onAfterStep(stepResult);
 
@@ -134,11 +170,11 @@ public class Agent<ACTION> {
             return;
         }
 
-        incrementEpisodeStepNumber();
+        incrementEpisodeStepCount();
     }
 
-    protected void incrementEpisodeStepNumber() {
-        ++episodeStepNumber;
+    protected void incrementEpisodeStepCount() {
+        ++episodeStepCount;
     }
 
     protected ACTION decideAction(Observation observation) {
@@ -150,12 +186,15 @@ public class Agent<ACTION> {
     }
 
     protected StepResult act(ACTION action) {
-        return environment.step(action);
-    }
+        Observation observationBeforeAction = observation;
 
-    protected void handleStepResult(StepResult stepResult) {
-        observation = convertChannelDataToObservation(stepResult, episodeStepNumber + 1);
-        reward +=computeReward(stepResult);
+        StepResult stepResult = environment.step(action);
+        observation = convertChannelDataToObservation(stepResult, episodeStepCount + 1);
+        reward += computeReward(stepResult);
+
+        onAfterAction(observationBeforeAction, action, stepResult);
+
+        return stepResult;
     }
 
     protected Observation convertChannelDataToObservation(StepResult stepResult, int episodeStepNumberOfObs) {
@@ -166,6 +205,10 @@ public class Agent<ACTION> {
         return stepResult.getReward();
     }
 
+    protected void onAfterAction(Observation observationBeforeAction, ACTION action, StepResult stepResult) {
+        // Do Nothing
+    }
+
     protected void onAfterStep(StepResult stepResult) {
         // Do Nothing
     }
@@ -174,16 +217,24 @@ public class Agent<ACTION> {
         // Do Nothing
     }
 
-    public static <ACTION> Builder<ACTION> builder(@NonNull Environment<ACTION> environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy<ACTION> policy) {
+    /**
+     *
+     * @param environment
+     * @param transformProcess
+     * @param policy
+     * @param <ACTION>
+     * @return
+     */
+    public static <ACTION> Builder<ACTION, Agent> builder(@NonNull Environment<ACTION> environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy<ACTION> policy) {
         return new Builder<>(environment, transformProcess, policy);
     }
 
-    public static class Builder<ACTION> {
-        private final Environment<ACTION> environment;
-        private final TransformProcess transformProcess;
-        private final IPolicy<ACTION> policy;
-        private Integer maxEpisodeSteps = null; // Default, no max
-        private String id;
+    public static class Builder<ACTION, AGENT_TYPE extends Agent> {
+        protected final Environment<ACTION> environment;
+        protected final TransformProcess transformProcess;
+        protected final IPolicy<ACTION> policy;
+        protected Integer maxEpisodeSteps = null; // Default, no max
+        protected String id;
 
         public Builder(@NonNull Environment<ACTION> environment, @NonNull TransformProcess transformProcess, @NonNull IPolicy<ACTION> policy) {
             this.environment = environment;
@@ -191,20 +242,20 @@ public class Agent<ACTION> {
             this.policy = policy;
         }
 
-        public Builder<ACTION> maxEpisodeSteps(int maxEpisodeSteps) {
+        public Builder<ACTION, AGENT_TYPE> maxEpisodeSteps(int maxEpisodeSteps) {
             Preconditions.checkArgument(maxEpisodeSteps > 0, "maxEpisodeSteps must be greater than 0, got", maxEpisodeSteps);
             this.maxEpisodeSteps = maxEpisodeSteps;
 
             return this;
         }
 
-        public Builder<ACTION> id(String id) {
+        public Builder<ACTION, AGENT_TYPE> id(String id) {
             this.id = id;
             return this;
         }
 
-        public Agent build() {
-            return new Agent(this);
+        public AGENT_TYPE build() {
+            return (AGENT_TYPE)new Agent<ACTION>(environment, transformProcess, policy, maxEpisodeSteps, id);
         }
     }
 }
\ No newline at end of file
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java
new file mode 100644
index 000000000..8fd963cda
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/AgentLearner.java
@@ -0,0 +1,115 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent;
+
+import lombok.Getter;
+import lombok.NonNull;
+import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior;
+import org.deeplearning4j.rl4j.environment.Environment;
+import org.deeplearning4j.rl4j.environment.StepResult;
+import org.deeplearning4j.rl4j.observation.Observation;
+import org.deeplearning4j.rl4j.observation.transform.TransformProcess;
+import org.deeplearning4j.rl4j.policy.IPolicy;
+
+/**
+ * The ActionLearner is an {@link Agent} that delegate the learning to a {@link ILearningBehavior}.
+ * @param <ACTION> The type of the action
+ */
+public class AgentLearner<ACTION> extends Agent<ACTION> implements IAgentLearner<ACTION> {
+
+    @Getter
+    private int totalStepCount = 0;
+
+    private final ILearningBehavior<ACTION> learningBehavior;
+    private double rewardAtLastExperience;
+
+    /**
+     *
+     * @param environment The {@link Environment} to be used
+     * @param transformProcess The {@link TransformProcess} to be used to transform the raw observations into usable ones.
+     * @param policy The {@link IPolicy} to be used
+     * @param maxEpisodeSteps The maximum number of steps an episode can have before being interrupted. Use null to have no max.
+     * @param id A user-supplied id to identify the instance.
+     * @param learningBehavior The {@link ILearningBehavior} that will be used to supervise the learning.
+     */
+    public AgentLearner(Environment<ACTION> environment, TransformProcess transformProcess, IPolicy<ACTION> policy, Integer maxEpisodeSteps, String id, @NonNull ILearningBehavior<ACTION> learningBehavior) {
+        super(environment, transformProcess, policy, maxEpisodeSteps, id);
+
+        this.learningBehavior = learningBehavior;
+    }
+
+    @Override
+    protected void reset() {
+        super.reset();
+
+        rewardAtLastExperience = 0;
+    }
+
+    @Override
+    protected void onBeforeEpisode() {
+        super.onBeforeEpisode();
+
+        learningBehavior.handleEpisodeStart();
+    }
+
+    @Override
+    protected void onAfterAction(Observation observationBeforeAction, ACTION action, StepResult stepResult) {
+        if(!observationBeforeAction.isSkipped()) {
+            double rewardSinceLastExperience = getReward() - rewardAtLastExperience;
+            learningBehavior.handleNewExperience(observationBeforeAction, action, rewardSinceLastExperience, stepResult.isTerminal());
+
+            rewardAtLastExperience = getReward();
+        }
+    }
+
+    @Override
+    protected void onAfterEpisode() {
+        learningBehavior.handleEpisodeEnd(getObservation());
+    }
+
+    @Override
+    protected void incrementEpisodeStepCount() {
+        super.incrementEpisodeStepCount();
+        ++totalStepCount;
+    }
+
+    // FIXME: parent is still visible
+    public static <ACTION> AgentLearner.Builder<ACTION, AgentLearner<ACTION>> builder(Environment<ACTION> environment,
+                                                   TransformProcess transformProcess,
+                                                   IPolicy<ACTION> policy,
+                                                   ILearningBehavior<ACTION> learningBehavior) {
+        return new AgentLearner.Builder<ACTION, AgentLearner<ACTION>>(environment, transformProcess, policy, learningBehavior);
+    }
+
+    public static class Builder<ACTION, AGENT_TYPE extends AgentLearner<ACTION>> extends Agent.Builder<ACTION, AGENT_TYPE> {
+
+        private final ILearningBehavior<ACTION> learningBehavior;
+
+        public Builder(@NonNull Environment<ACTION> environment,
+                       @NonNull TransformProcess transformProcess,
+                       @NonNull IPolicy<ACTION> policy,
+                       @NonNull ILearningBehavior<ACTION> learningBehavior) {
+            super(environment, transformProcess, policy);
+
+            this.learningBehavior = learningBehavior;
+        }
+
+        @Override
+        public AGENT_TYPE build() {
+            return (AGENT_TYPE)new AgentLearner<ACTION>(environment, transformProcess, policy, maxEpisodeSteps, id, learningBehavior);
+        }
+    }
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java
new file mode 100644
index 000000000..7cbd68a70
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgent.java
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent;
+
+import org.deeplearning4j.rl4j.environment.Environment;
+import org.deeplearning4j.rl4j.policy.IPolicy;
+
+/**
+ * The interface of {@link Agent}
+ * @param <ACTION>
+ */
+public interface IAgent<ACTION> {
+    /**
+     * Will play a single episode
+     */
+    void run();
+
+    /**
+     * @return A user-supplied id to identify the IAgent instance.
+     */
+    String getId();
+
+    /**
+     * @return The {@link Environment} instance being used by the agent.
+     */
+    Environment<ACTION> getEnvironment();
+
+    /**
+     * @return The {@link IPolicy} instance being used by the agent.
+     */
+    IPolicy<ACTION> getPolicy();
+
+    /**
+     * @return The step count taken in the current episode.
+     */
+    int getEpisodeStepCount();
+
+    /**
+     * @return The cumulative reward received in the current episode.
+     */
+    double getReward();
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java
new file mode 100644
index 000000000..b1bdd1646
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/IAgentLearner.java
@@ -0,0 +1,24 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent;
+
+public interface IAgentLearner<ACTION> extends IAgent<ACTION> {
+
+    /**
+     * @return The total count of steps taken by this AgentLearner, for all episodes.
+     */
+    int getTotalStepCount();
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java
new file mode 100644
index 000000000..0187d8c3a
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/ILearningBehavior.java
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent.learning;
+
+import org.deeplearning4j.rl4j.observation.Observation;
+
+/**
+ * The <code>ILearningBehavior</code> implementations are in charge of the training. Through this interface, they are
+ * notified as new experience is generated.
+ *
+ * @param <ACTION> The type of action
+ */
+public interface ILearningBehavior<ACTION> {
+
+    /**
+     * This method is called when a new episode has been started.
+     */
+    void handleEpisodeStart();
+
+    /**
+     * This method is called when new experience is generated.
+     *
+     * @param observation The observation prior to taking the action
+     * @param action The action that has been taken
+     * @param reward The reward received by taking the action
+     * @param isTerminal True if the episode ended after taking the action
+     */
+    void handleNewExperience(Observation observation, ACTION action, double reward, boolean isTerminal);
+
+    /**
+     * This method is called when the episode ends or the maximum number of episode steps is reached.
+     *
+     * @param finalObservation The observation after the last action of the episode has been taken.
+     */
+    void handleEpisodeEnd(Observation finalObservation);
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java
new file mode 100644
index 000000000..85c7ec4ce
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/learning/LearningBehavior.java
@@ -0,0 +1,59 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent.learning;
+
+import lombok.Builder;
+import org.deeplearning4j.rl4j.agent.update.IUpdateRule;
+import org.deeplearning4j.rl4j.experience.ExperienceHandler;
+import org.deeplearning4j.rl4j.observation.Observation;
+
+/**
+ * A generic {@link ILearningBehavior} that delegates the handling of experience to a {@link ExperienceHandler} and
+ * the update logic to a {@link IUpdateRule}
+ *
+ * @param <ACTION> The type of the action
+ * @param <EXPERIENCE_TYPE> The type of experience the ExperienceHandler needs
+ */
+@Builder
+public class LearningBehavior<ACTION, EXPERIENCE_TYPE> implements ILearningBehavior<ACTION> {
+
+    @Builder.Default
+    private int experienceUpdateSize = 64;
+
+    private final ExperienceHandler<ACTION, EXPERIENCE_TYPE> experienceHandler;
+    private final IUpdateRule<EXPERIENCE_TYPE> updateRule;
+
+    @Override
+    public void handleEpisodeStart() {
+        experienceHandler.reset();
+    }
+
+    @Override
+    public void handleNewExperience(Observation observation, ACTION action, double reward, boolean isTerminal) {
+        experienceHandler.addExperience(observation, action, reward, isTerminal);
+        if(experienceHandler.isTrainingBatchReady()) {
+            updateRule.update(experienceHandler.generateTrainingBatch());
+        }
+    }
+
+    @Override
+    public void handleEpisodeEnd(Observation finalObservation) {
+        experienceHandler.setFinalObservation(finalObservation);
+        if(experienceHandler.isTrainingBatchReady()) {
+            updateRule.update(experienceHandler.generateTrainingBatch());
+        }
+    }
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java
index 898f89241..f176da144 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListener.java
@@ -1,23 +1,66 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.agent.listener;
 
 import org.deeplearning4j.rl4j.agent.Agent;
 import org.deeplearning4j.rl4j.environment.StepResult;
 import org.deeplearning4j.rl4j.observation.Observation;
 
+/**
+ * The base definition of all {@link Agent} event listeners
+ */
 public interface AgentListener<ACTION> {
     enum ListenerResponse {
         /**
-         * Tell the learning process to continue calling the listeners and the training.
+         * Tell the {@link Agent} to continue calling the listeners and the processing.
          */
         CONTINUE,
 
         /**
-         * Tell the learning process to stop calling the listeners and terminate the training.
+         * Tell the {@link Agent} to interrupt calling the listeners and stop the processing.
          */
         STOP,
     }
 
+    /**
+     * Called when a new episode is about to start.
+     * @param agent The agent that generated the event
+     *
+     * @return A {@link ListenerResponse}.
+     */
     AgentListener.ListenerResponse onBeforeEpisode(Agent agent);
+
+    /**
+     * Called when a step is about to be taken.
+     *
+     * @param agent The agent that generated the event
+     * @param observation The observation before the action is taken
+     * @param action The action that will be performed
+     *
+     * @return A {@link ListenerResponse}.
+     */
     AgentListener.ListenerResponse onBeforeStep(Agent agent, Observation observation, ACTION action);
+
+    /**
+     * Called after a step has been taken.
+     *
+     * @param agent The agent that generated the event
+     * @param stepResult The {@link StepResult} result of the step.
+     *
+     * @return A {@link ListenerResponse}.
+     */
     AgentListener.ListenerResponse onAfterStep(Agent agent, StepResult stepResult);
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java
index e003934d4..48538aeaf 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/listener/AgentListenerList.java
@@ -1,3 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.agent.listener;
 
 import org.deeplearning4j.rl4j.agent.Agent;
@@ -7,6 +22,10 @@ import org.deeplearning4j.rl4j.observation.Observation;
 import java.util.ArrayList;
 import java.util.List;
 
+/**
+ * A class that manages a list of {@link AgentListener AgentListeners} listening to an {@link Agent}.
+ * @param <ACTION>
+ */
 public class AgentListenerList<ACTION> {
     protected final List<AgentListener<ACTION>> listeners = new ArrayList<>();
 
@@ -18,6 +37,13 @@ public class AgentListenerList<ACTION> {
         listeners.add(listener);
     }
 
+    /**
+     * This method will notify all listeners that an episode is about to start. If a listener returns
+     * {@link AgentListener.ListenerResponse STOP}, any following listener is skipped.
+     *
+     * @param agent The agent that generated the event.
+     * @return False if the processing should be stopped
+     */
     public boolean notifyBeforeEpisode(Agent<ACTION> agent) {
         for (AgentListener<ACTION> listener : listeners) {
             if (listener.onBeforeEpisode(agent) == AgentListener.ListenerResponse.STOP) {
@@ -28,6 +54,13 @@ public class AgentListenerList<ACTION> {
         return true;
     }
 
+    /**
+     *
+     * @param agent The agent that generated the event.
+     * @param observation The observation before the action is taken
+     * @param action The action that will be performed
+     * @return False if the processing should be stopped
+     */
     public boolean notifyBeforeStep(Agent<ACTION> agent, Observation observation, ACTION action) {
         for (AgentListener<ACTION> listener : listeners) {
             if (listener.onBeforeStep(agent, observation, action) == AgentListener.ListenerResponse.STOP) {
@@ -38,6 +71,12 @@ public class AgentListenerList<ACTION> {
         return true;
     }
 
+    /**
+     *
+     * @param agent The agent that generated the event.
+     * @param stepResult The {@link StepResult} result of the step.
+     * @return False if the processing should be stopped
+     */
     public boolean notifyAfterStep(Agent<ACTION> agent, StepResult stepResult) {
         for (AgentListener<ACTION> listener : listeners) {
             if (listener.onAfterStep(agent, stepResult) == AgentListener.ListenerResponse.STOP) {
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java
new file mode 100644
index 000000000..46123d645
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/DQNNeuralNetUpdateRule.java
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent.update;
+
+import lombok.Getter;
+import org.deeplearning4j.rl4j.learning.sync.Transition;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.TargetQNetworkSource;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.DoubleDQN;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.ITDTargetAlgorithm;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.StandardDQN;
+import org.deeplearning4j.rl4j.network.dqn.IDQN;
+import org.nd4j.linalg.dataset.api.DataSet;
+
+import java.util.List;
+
+// Temporary class that will be replaced with a more generic class that delegates gradient computation
+// and network update to sub components.
+public class DQNNeuralNetUpdateRule implements IUpdateRule<Transition<Integer>>, TargetQNetworkSource {
+
+    @Getter
+    private final IDQN qNetwork;
+
+    @Getter
+    private IDQN targetQNetwork;
+    private final int targetUpdateFrequency;
+
+    private final ITDTargetAlgorithm<Integer> tdTargetAlgorithm;
+
+    @Getter
+    private int updateCount = 0;
+
+    public DQNNeuralNetUpdateRule(IDQN qNetwork, int targetUpdateFrequency, boolean isDoubleDQN, double gamma, double errorClamp) {
+        this.qNetwork = qNetwork;
+        this.targetQNetwork = qNetwork.clone();
+        this.targetUpdateFrequency = targetUpdateFrequency;
+        tdTargetAlgorithm = isDoubleDQN
+                ? new DoubleDQN(this, gamma, errorClamp)
+                : new StandardDQN(this, gamma, errorClamp);
+    }
+
+    @Override
+    public void update(List<Transition<Integer>> trainingBatch) {
+        DataSet targets = tdTargetAlgorithm.computeTDTargets(trainingBatch);
+        qNetwork.fit(targets.getFeatures(), targets.getLabels());
+        if(++updateCount % targetUpdateFrequency == 0) {
+            targetQNetwork = qNetwork.clone();
+        }
+    }
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java
new file mode 100644
index 000000000..4307efe1e
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/Gradients.java
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent.update;
+
+import lombok.Value;
+import org.deeplearning4j.nn.gradient.Gradient;
+
+// Work in progress
+@Value
+public class Gradients {
+    private Gradient[] gradients; // Temporary: we'll need something better than a Gradient[]
+    private int batchSize;
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java
new file mode 100644
index 000000000..d679cba24
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/agent/update/IUpdateRule.java
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.agent.update;
+
+import java.util.List;
+
+/**
+ * The role of IUpdateRule implementations is to use an experience batch to improve the accuracy of the policy.
+ * Used by {@link org.deeplearning4j.rl4j.agent.AgentLearner AgentLearner}
+ * @param <EXPERIENCE_TYPE> The type of the experience
+ */
+public interface IUpdateRule<EXPERIENCE_TYPE> {
+    /**
+     * Perform the update
+     * @param trainingBatch A batch of experience
+     */
+    void update(List<EXPERIENCE_TYPE> trainingBatch);
+
+    /**
+     * @return The total number of times the policy has been updated. In a multi-agent learning context, this total is
+     * for all the agents.
+     */
+    int getUpdateCount();
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java
deleted file mode 100644
index f6521e734..000000000
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/ActionSchema.java
+++ /dev/null
@@ -1,9 +0,0 @@
-package org.deeplearning4j.rl4j.environment;
-
-import lombok.Value;
-
-@Value
-public class ActionSchema<ACTION> {
-    private ACTION noOp;
-    //FIXME ACTION randomAction();
-}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java
index 95ff7d2b6..7fa84cc51 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Environment.java
@@ -1,11 +1,54 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.environment;
 
 import java.util.Map;
 
+/**
+ * An interface for environments used by the {@link org.deeplearning4j.rl4j.agent.Agent Agents}.
+ * @param <ACTION> The type of actions
+ */
 public interface Environment<ACTION> {
+
+    /**
+     * @return The {@link Schema} of the environment
+     */
     Schema<ACTION> getSchema();
+
+    /**
+     * Reset the environment's state to start a new episode.
+     * @return
+     */
     Map<String, Object> reset();
+
+    /**
+     * Perform a single step.
+     *
+     * @param action The action taken
+     * @return A {@link StepResult} describing the result of the step.
+     */
     StepResult step(ACTION action);
+
+    /**
+     * @return True if the episode is finished
+     */
     boolean isEpisodeFinished();
+
+    /**
+     * Called when the agent is finished using this environment instance.
+     */
     void close();
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java
new file mode 100644
index 000000000..9e6e81a7b
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IActionSchema.java
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.environment;
+
+import lombok.Value;
+
+// Work in progress
+public interface IActionSchema<ACTION> {
+    ACTION getNoOp();
+
+    // Review: A schema should be data-only and not have behavior
+    ACTION getRandomAction();
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java
new file mode 100644
index 000000000..cdf172da6
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/IntegerActionSchema.java
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.rl4j.environment;
+
+import org.nd4j.linalg.api.rng.Random;
+import org.nd4j.linalg.factory.Nd4j;
+
+// Work in progress
+public class IntegerActionSchema implements IActionSchema<Integer> {
+
+    private final int numActions;
+    private final int noOpAction;
+    private final Random rnd;
+
+    public IntegerActionSchema(int numActions, int noOpAction) {
+        this(numActions, noOpAction, Nd4j.getRandom());
+    }
+
+    public IntegerActionSchema(int numActions, int noOpAction, Random rnd) {
+        this.numActions = numActions;
+        this.noOpAction = noOpAction;
+        this.rnd = rnd;
+    }
+
+    @Override
+    public Integer getNoOp() {
+        return noOpAction;
+    }
+
+    @Override
+    public Integer getRandomAction() {
+        return rnd.nextInt(numActions);
+    }
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java
index 5ddea24cd..7768c0553 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/Schema.java
@@ -1,8 +1,24 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.environment;
 
 import lombok.Value;
 
+// Work in progress
 @Value
 public class Schema<ACTION> {
-    private ActionSchema<ACTION> actionSchema;
+    private IActionSchema<ACTION> actionSchema;
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java
index b64dd08f5..4936625db 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/environment/StepResult.java
@@ -1,3 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
 package org.deeplearning4j.rl4j.environment;
 
 import lombok.Value;
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java
index 0017925df..e15c08415 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ExperienceHandler.java
@@ -41,6 +41,11 @@ public interface ExperienceHandler<A, E> {
      */
     int getTrainingBatchSize();
 
+    /**
+     * @return True if a batch is ready for training.
+     */
+    boolean isTrainingBatchReady();
+
     /**
      * The elements are returned in the historical order (i.e. in the order they happened)
      * @return The list of experience elements
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java
index 74b7e3f05..c7f7d51ae 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandler.java
@@ -36,6 +36,7 @@ import java.util.List;
 public class ReplayMemoryExperienceHandler<A> implements ExperienceHandler<A, Transition<A>> {
     private static final int DEFAULT_MAX_REPLAY_MEMORY_SIZE = 150000;
     private static final int DEFAULT_BATCH_SIZE = 32;
+    private final int batchSize;
 
     private IExpReplay<A> expReplay;
 
@@ -43,6 +44,7 @@ public class ReplayMemoryExperienceHandler<A> implements ExperienceHandler<A, Tr
 
     public ReplayMemoryExperienceHandler(IExpReplay<A> expReplay) {
         this.expReplay = expReplay;
+        this.batchSize = expReplay.getDesignatedBatchSize();
     }
 
     public ReplayMemoryExperienceHandler(int maxReplayMemorySize, int batchSize, Random random) {
@@ -64,6 +66,11 @@ public class ReplayMemoryExperienceHandler<A> implements ExperienceHandler<A, Tr
         return expReplay.getBatchSize();
     }
 
+    @Override
+    public boolean isTrainingBatchReady() {
+        return expReplay.getBatchSize() >= batchSize;
+    }
+
     /**
      * @return A batch of experience selected from the replay memory. The replay memory is unchanged after the call.
      */
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java
index 4c6b95c89..a8fae47bc 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandler.java
@@ -30,10 +30,18 @@ import java.util.List;
  */
 public class StateActionExperienceHandler<A> implements ExperienceHandler<A, StateActionPair<A>> {
 
+    private final int batchSize;
+
+    private boolean isFinalObservationSet;
+
+    public StateActionExperienceHandler(int batchSize) {
+        this.batchSize = batchSize;
+    }
+
     private List<StateActionPair<A>> stateActionPairs = new ArrayList<>();
 
     public void setFinalObservation(Observation observation) {
-        // Do nothing
+        isFinalObservationSet = true;
     }
 
     public void addExperience(Observation observation, A action, double reward, boolean isTerminal) {
@@ -45,6 +53,12 @@ public class StateActionExperienceHandler<A> implements ExperienceHandler<A, Sta
         return stateActionPairs.size();
     }
 
+    @Override
+    public boolean isTrainingBatchReady() {
+        return stateActionPairs.size() >= batchSize
+                || (isFinalObservationSet && stateActionPairs.size() > 0);
+    }
+
     /**
      * The elements are returned in the historical order (i.e. in the order they happened)
      * Note: the experience store is cleared after calling this method.
@@ -62,6 +76,7 @@ public class StateActionExperienceHandler<A> implements ExperienceHandler<A, Sta
     @Override
     public void reset() {
         stateActionPairs = new ArrayList<>();
+        isFinalObservationSet = false;
     }
 
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java
index b42a7c503..9c35ed6f4 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/helper/INDArrayHelper.java
@@ -24,17 +24,38 @@ import org.nd4j.linalg.factory.Nd4j;
  * @author Alexandre Boulanger
  */
 public class INDArrayHelper {
-
     /**
-     * MultiLayerNetwork and ComputationGraph expects input data to be in NCHW in the case of pixels and NS in case of other data types.
-     *
-     * We must have either shape 2 (NK) or shape 4 (NCHW)
+     * Force the input source to have the correct shape:
+     *  <p><ul>
+     *      <li>DL4J requires it to be at least 2D</li>
+     *      <li>RL4J has a convention to have the batch size on dimension 0 to all INDArrays</li>
+     *  </ul></p>
+     * @param source The {@link INDArray} to be corrected.
+     * @return The corrected INDArray
      */
     public static INDArray forceCorrectShape(INDArray source) {
 
-        return source.shape()[0] == 1 && source.shape().length > 1
+        return source.shape()[0] == 1 && source.rank() > 1
                 ? source
                 : Nd4j.expandDims(source, 0);
 
     }
+
+    /**
+     * This will create a INDArray with <i>batchSize</i> as dimension 0 and <i>shape</i> as other dimensions.
+     * For example, if <i>batchSize</i> is 10 and shape is { 1, 3, 4 }, the resulting INDArray shape will be { 10, 3, 4}
+     * @param batchSize The size of the batch to create
+     * @param shape The shape of individual elements.
+     *              Note: all shapes in RL4J should have a batch size as dimension 0; in this case the batch size should be 1.
+     * @return A INDArray
+     */
+    public static INDArray createBatchForShape(long batchSize, long... shape) {
+        long[] batchShape;
+
+        batchShape = new long[shape.length];
+        System.arraycopy(shape, 0, batchShape, 0, shape.length);
+
+        batchShape[0] = batchSize;
+        return Nd4j.create(batchShape);
+    }
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java
index c32be6906..bf8838424 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/AsyncThreadDiscrete.java
@@ -25,6 +25,7 @@ import org.deeplearning4j.gym.StepReply;
 import org.deeplearning4j.rl4j.experience.ExperienceHandler;
 import org.deeplearning4j.rl4j.experience.StateActionExperienceHandler;
 import org.deeplearning4j.rl4j.learning.IHistoryProcessor;
+import org.deeplearning4j.rl4j.learning.configuration.IAsyncLearningConfiguration;
 import org.deeplearning4j.rl4j.learning.listener.TrainingListenerList;
 import org.deeplearning4j.rl4j.mdp.MDP;
 import org.deeplearning4j.rl4j.network.NeuralNet;
@@ -49,7 +50,7 @@ public abstract class AsyncThreadDiscrete<OBSERVATION extends Encodable, NN exte
 
     // TODO: Make it configurable with a builder
     @Setter(AccessLevel.PROTECTED) @Getter
-    private ExperienceHandler experienceHandler = new StateActionExperienceHandler();
+    private ExperienceHandler experienceHandler;
 
     public AsyncThreadDiscrete(IAsyncGlobal<NN> asyncGlobal,
                                MDP<OBSERVATION, Integer, DiscreteSpace> mdp,
@@ -60,6 +61,17 @@ public abstract class AsyncThreadDiscrete<OBSERVATION extends Encodable, NN exte
         synchronized (asyncGlobal) {
             current = (NN) asyncGlobal.getTarget().clone();
         }
+
+        experienceHandler = new StateActionExperienceHandler(getNStep());
+    }
+
+    private int getNStep() {
+        IAsyncLearningConfiguration configuration = getConfiguration();
+        if(configuration == null) {
+            return Integer.MAX_VALUE;
+        }
+
+        return configuration.getNStep();
     }
 
     // TODO: Add an actor-learner class and be able to inject the update algorithm
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/AsyncNStepQLearningThreadDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/AsyncNStepQLearningThreadDiscrete.java
index 0b8535f53..24b2f350a 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/AsyncNStepQLearningThreadDiscrete.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/AsyncNStepQLearningThreadDiscrete.java
@@ -71,7 +71,6 @@ public class AsyncNStepQLearningThreadDiscrete<OBSERVATION extends Encodable> ex
 
     @Override
     protected UpdateAlgorithm<IDQN> buildUpdateAlgorithm() {
-        int[] shape = getHistoryProcessor() == null ? getMdp().getObservationSpace().getShape() : getHistoryProcessor().getConf().getShape();
-        return new QLearningUpdateAlgorithm(shape, getMdp().getActionSpace().getSize(), configuration.getGamma());
+        return new QLearningUpdateAlgorithm(getMdp().getActionSpace().getSize(), configuration.getGamma());
     }
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java
index 79c9666a2..f935240dc 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithm.java
@@ -17,7 +17,7 @@ package org.deeplearning4j.rl4j.learning.async.nstep.discrete;
 
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.rl4j.experience.StateActionPair;
-import org.deeplearning4j.rl4j.learning.Learning;
+import org.deeplearning4j.rl4j.helper.INDArrayHelper;
 import org.deeplearning4j.rl4j.learning.async.UpdateAlgorithm;
 import org.deeplearning4j.rl4j.network.dqn.IDQN;
 import org.nd4j.linalg.api.ndarray.INDArray;
@@ -27,15 +27,12 @@ import java.util.List;
 
 public class QLearningUpdateAlgorithm implements UpdateAlgorithm<IDQN> {
 
-    private final int[] shape;
     private final int actionSpaceSize;
     private final double gamma;
 
-    public QLearningUpdateAlgorithm(int[] shape,
-                                    int actionSpaceSize,
+    public QLearningUpdateAlgorithm(int actionSpaceSize,
                                     double gamma) {
 
-        this.shape = shape;
         this.actionSpaceSize = actionSpaceSize;
         this.gamma = gamma;
     }
@@ -44,33 +41,34 @@ public class QLearningUpdateAlgorithm implements UpdateAlgorithm<IDQN> {
     public Gradient[] computeGradients(IDQN current, List<StateActionPair<Integer>> experience) {
         int size = experience.size();
 
-        int[] nshape = Learning.makeShape(size, shape);
-        INDArray input = Nd4j.create(nshape);
-        INDArray targets = Nd4j.create(size, actionSpaceSize);
-
         StateActionPair<Integer> stateActionPair = experience.get(size - 1);
 
+        INDArray data = stateActionPair.getObservation().getData();
+        INDArray features = INDArrayHelper.createBatchForShape(size, data.shape());
+        INDArray targets = Nd4j.create(size, actionSpaceSize);
+
         double r;
         if (stateActionPair.isTerminal()) {
             r = 0;
         } else {
             INDArray[] output = null;
-            output = current.outputAll(stateActionPair.getObservation().getData());
+            output = current.outputAll(data);
             r = Nd4j.max(output[0]).getDouble(0);
         }
 
         for (int i = size - 1; i >= 0; i--) {
             stateActionPair = experience.get(i);
+            data = stateActionPair.getObservation().getData();
 
-            input.putRow(i, stateActionPair.getObservation().getData());
+            features.putRow(i, data);
 
             r = stateActionPair.getReward() + gamma * r;
-            INDArray[] output = current.outputAll(stateActionPair.getObservation().getData());
+            INDArray[] output = current.outputAll(data);
             INDArray row = output[0];
             row = row.putScalar(stateActionPair.getAction(), r);
             targets.putRow(i, row);
         }
 
-        return current.gradient(input, targets);
+        return current.gradient(features, targets);
     }
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java
index 93b4d1bb5..7bfcad53d 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/ExpReplay.java
@@ -80,6 +80,11 @@ public class ExpReplay<A> implements IExpReplay<A> {
         //log.info("size: "+storage.size());
     }
 
+    @Override
+    public int getDesignatedBatchSize() {
+        return batchSize;
+    }
+
     public int getBatchSize() {
         int storageSize = storage.size();
         return Math.min(storageSize, batchSize);
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java
index eaef5f0f8..8b2133806 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/IExpReplay.java
@@ -47,4 +47,9 @@ public interface IExpReplay<A> {
      * @param transition a new transition to store
      */
     void store(Transition<A> transition);
+
+    /**
+     * @return The desired size of batches
+     */
+    int getDesignatedBatchSize();
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java
index b2e06dc9c..d9c955e17 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/QLearning.java
@@ -51,25 +51,16 @@ import java.util.List;
 @Slf4j
 public abstract class QLearning<O extends Encodable, A, AS extends ActionSpace<A>>
                 extends SyncLearning<O, A, AS, IDQN>
-                implements TargetQNetworkSource, IEpochTrainer {
+                implements IEpochTrainer {
 
     protected abstract LegacyMDPWrapper<O, A, AS> getLegacyMDPWrapper();
 
-    protected abstract EpsGreedy<O, A, AS> getEgPolicy();
+    protected abstract EpsGreedy<A> getEgPolicy();
 
     public abstract MDP<O, A, AS> getMdp();
 
     public abstract IDQN getQNetwork();
 
-    public abstract IDQN getTargetQNetwork();
-
-    protected abstract void setTargetQNetwork(IDQN dqn);
-
-    protected void updateTargetNetwork() {
-        log.info("Update target network");
-        setTargetQNetwork(getQNetwork().clone());
-    }
-
     public IDQN getNeuralNet() {
         return getQNetwork();
     }
@@ -101,11 +92,6 @@ public abstract class QLearning<O extends Encodable, A, AS extends ActionSpace<A
         int numQ = 0;
         List<Double> scores = new ArrayList<>();
         while (currentEpisodeStepCount < getConfiguration().getMaxEpochStep() && !getMdp().isDone()) {
-
-            if (this.getStepCount() % getConfiguration().getTargetDqnUpdateFreq() == 0) {
-                updateTargetNetwork();
-            }
-
             QLStepReturn<Observation> stepR = trainStep(obs);
 
             if (!stepR.getMaxQ().isNaN()) {
@@ -146,7 +132,6 @@ public abstract class QLearning<O extends Encodable, A, AS extends ActionSpace<A
 
     protected void resetNetworks() {
         getQNetwork().reset();
-        getTargetQNetwork().reset();
     }
 
     private InitMdp<Observation> refacInitMdp() {
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
index 771650340..4e357584d 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
@@ -21,6 +21,10 @@ import lombok.AccessLevel;
 import lombok.Getter;
 import lombok.Setter;
 import org.deeplearning4j.gym.StepReply;
+import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior;
+import org.deeplearning4j.rl4j.agent.learning.LearningBehavior;
+import org.deeplearning4j.rl4j.agent.update.DQNNeuralNetUpdateRule;
+import org.deeplearning4j.rl4j.agent.update.IUpdateRule;
 import org.deeplearning4j.rl4j.experience.ExperienceHandler;
 import org.deeplearning4j.rl4j.experience.ReplayMemoryExperienceHandler;
 import org.deeplearning4j.rl4j.learning.IHistoryProcessor;
@@ -28,9 +32,6 @@ import org.deeplearning4j.rl4j.learning.Learning;
 import org.deeplearning4j.rl4j.learning.configuration.QLearningConfiguration;
 import org.deeplearning4j.rl4j.learning.sync.Transition;
 import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
-import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.DoubleDQN;
-import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.ITDTargetAlgorithm;
-import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.TDTargetAlgorithm.StandardDQN;
 import org.deeplearning4j.rl4j.mdp.MDP;
 import org.deeplearning4j.rl4j.network.dqn.IDQN;
 import org.deeplearning4j.rl4j.space.Encodable;
@@ -41,12 +42,8 @@ import org.deeplearning4j.rl4j.space.DiscreteSpace;
 import org.deeplearning4j.rl4j.util.LegacyMDPWrapper;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.rng.Random;
-import org.nd4j.linalg.dataset.api.DataSet;
 import org.nd4j.linalg.factory.Nd4j;
 
-import java.util.List;
-
-
 
 /**
  * @author rubenfiszel (ruben.fiszel@epfl.ch) 7/18/16.
@@ -63,22 +60,15 @@ public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O
     @Getter
     private DQNPolicy<O> policy;
     @Getter
-    private EpsGreedy<O, Integer, DiscreteSpace> egPolicy;
+    private EpsGreedy<Integer> egPolicy;
 
     @Getter
     final private IDQN qNetwork;
-    @Getter
-    @Setter(AccessLevel.PROTECTED)
-    private IDQN targetQNetwork;
 
     private int lastAction;
     private double accuReward = 0;
 
-    ITDTargetAlgorithm tdTargetAlgorithm;
-
-    // TODO: User a builder and remove the setter
-    @Getter(AccessLevel.PROTECTED) @Setter
-    private ExperienceHandler<Integer, Transition<Integer>> experienceHandler;
+    private final ILearningBehavior<Integer> learningBehavior;
 
     protected LegacyMDPWrapper<O, Integer, DiscreteSpace> getLegacyMDPWrapper() {
         return mdp;
@@ -88,21 +78,31 @@ public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O
         this(mdp, dqn, conf, epsilonNbStep, Nd4j.getRandomFactory().getNewRandomInstance(conf.getSeed()));
     }
 
+    public QLearningDiscrete(MDP<O, Integer, DiscreteSpace> mdp, IDQN dqn, QLearningConfiguration conf, int epsilonNbStep, Random random) {
+        this(mdp, dqn, conf, epsilonNbStep, buildLearningBehavior(dqn, conf, random), random);
+    }
+
     public QLearningDiscrete(MDP<O, Integer, DiscreteSpace> mdp, IDQN dqn, QLearningConfiguration conf,
-                             int epsilonNbStep, Random random) {
+                             int epsilonNbStep, ILearningBehavior<Integer> learningBehavior, Random random) {
         this.configuration = conf;
         this.mdp = new LegacyMDPWrapper<>(mdp, null);
         qNetwork = dqn;
-        targetQNetwork = dqn.clone();
         policy = new DQNPolicy(getQNetwork());
         egPolicy = new EpsGreedy(policy, mdp, conf.getUpdateStart(), epsilonNbStep, random, conf.getMinEpsilon(),
                 this);
 
-        tdTargetAlgorithm = conf.isDoubleDQN()
-                ? new DoubleDQN(this, conf.getGamma(), conf.getErrorClamp())
-                : new StandardDQN(this, conf.getGamma(), conf.getErrorClamp());
+        this.learningBehavior = learningBehavior;
+    }
+
+    private static ILearningBehavior<Integer> buildLearningBehavior(IDQN qNetwork, QLearningConfiguration conf, Random random) {
+        IUpdateRule<Transition<Integer>> updateRule = new DQNNeuralNetUpdateRule(qNetwork, conf.getTargetDqnUpdateFreq(), conf.isDoubleDQN(), conf.getGamma(), conf.getErrorClamp());
+        ExperienceHandler<Integer, Transition<Integer>> experienceHandler = new ReplayMemoryExperienceHandler(conf.getExpRepMaxSize(), conf.getBatchSize(), random);
+        return LearningBehavior.<Integer, Transition<Integer>>builder()
+                .experienceHandler(experienceHandler)
+                .updateRule(updateRule)
+                .experienceUpdateSize(conf.getBatchSize())
+                .build();
 
-        experienceHandler = new ReplayMemoryExperienceHandler(conf.getExpRepMaxSize(), conf.getBatchSize(), random);
     }
 
     public MDP<O, Integer, DiscreteSpace> getMdp() {
@@ -119,7 +119,7 @@ public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O
     public void preEpoch() {
         lastAction = mdp.getActionSpace().noOp();
         accuReward = 0;
-        experienceHandler.reset();
+        learningBehavior.handleEpisodeStart();
     }
 
     @Override
@@ -136,12 +136,6 @@ public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O
      */
     protected QLStepReturn<Observation> trainStep(Observation obs) {
 
-        boolean isHistoryProcessor = getHistoryProcessor() != null;
-        int skipFrame = isHistoryProcessor ? getHistoryProcessor().getConf().getSkipFrame() : 1;
-        int historyLength = isHistoryProcessor ? getHistoryProcessor().getConf().getHistoryLength() : 1;
-        int updateStart = this.getConfiguration().getUpdateStart()
-                + ((this.getConfiguration().getBatchSize() + historyLength) * skipFrame);
-
         Double maxQ = Double.NaN; //ignore if Nan for stats
 
         //if step of training, just repeat lastAction
@@ -160,29 +154,15 @@ public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O
         if (!obs.isSkipped()) {
 
             // Add experience
-            experienceHandler.addExperience(obs, lastAction, accuReward, stepReply.isDone());
+            learningBehavior.handleNewExperience(obs, lastAction, accuReward, stepReply.isDone());
             accuReward = 0;
-
-            // Update NN
-            // FIXME: maybe start updating when experience replay has reached a certain size instead of using "updateStart"?
-            if (this.getStepCount() > updateStart) {
-                DataSet targets = setTarget(experienceHandler.generateTrainingBatch());
-                getQNetwork().fit(targets.getFeatures(), targets.getLabels());
-            }
         }
 
         return new QLStepReturn<>(maxQ, getQNetwork().getLatestScore(), stepReply);
     }
 
-    protected DataSet setTarget(List<Transition<Integer>> transitions) {
-        if (transitions.size() == 0)
-            throw new IllegalArgumentException("too few transitions");
-
-        return tdTargetAlgorithm.computeTDTargets(transitions);
-    }
-
     @Override
     protected void finishEpoch(Observation observation) {
-        experienceHandler.setFinalObservation(observation);
+        learningBehavior.handleEpisodeEnd(observation);
     }
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java
index 1e1348b4a..86907017b 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/mdp/CartpoleEnvironment.java
@@ -2,21 +2,19 @@ package org.deeplearning4j.rl4j.mdp;
 
 import lombok.Getter;
 import lombok.Setter;
-import org.deeplearning4j.rl4j.environment.ActionSchema;
-import org.deeplearning4j.rl4j.environment.Environment;
-import org.deeplearning4j.rl4j.environment.Schema;
-import org.deeplearning4j.rl4j.environment.StepResult;
+import org.deeplearning4j.rl4j.environment.*;
+import org.nd4j.linalg.api.rng.Random;
+import org.nd4j.linalg.factory.Nd4j;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Random;
 
 public class CartpoleEnvironment implements Environment<Integer> {
     private static final int NUM_ACTIONS = 2;
     private static final int ACTION_LEFT = 0;
     private static final int ACTION_RIGHT = 1;
 
-    private static final Schema<Integer> schema = new Schema<>(new ActionSchema<>(ACTION_LEFT));
+    private final Schema<Integer> schema;
 
     public enum KinematicsIntegrators { Euler, SemiImplicitEuler };
 
@@ -48,11 +46,12 @@ public class CartpoleEnvironment implements Environment<Integer> {
     private Integer stepsBeyondDone;
 
     public CartpoleEnvironment() {
-        rnd = new Random();
+        this(Nd4j.getRandom());
     }
 
-    public CartpoleEnvironment(int seed) {
-        rnd = new Random(seed);
+    public CartpoleEnvironment(Random rnd) {
+        this.rnd = rnd;
+        this.schema = new Schema<Integer>(new IntegerActionSchema(NUM_ACTIONS, ACTION_LEFT, rnd));
     }
 
     @Override
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java
index a7282f139..f7422be92 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/EpsGreedy.java
@@ -17,16 +17,19 @@
 
 package org.deeplearning4j.rl4j.policy;
 
-import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.NonNull;
 import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.rl4j.environment.IActionSchema;
 import org.deeplearning4j.rl4j.learning.IEpochTrainer;
 import org.deeplearning4j.rl4j.mdp.MDP;
 import org.deeplearning4j.rl4j.network.NeuralNet;
-import org.deeplearning4j.rl4j.space.Encodable;
 import org.deeplearning4j.rl4j.observation.Observation;
 import org.deeplearning4j.rl4j.space.ActionSpace;
+import org.deeplearning4j.rl4j.space.Encodable;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.rng.Random;
+import org.nd4j.linalg.factory.Nd4j;
 
 /**
  * @author rubenfiszel (ruben.fiszel@epfl.ch) 7/24/16.
@@ -38,18 +41,60 @@ import org.nd4j.linalg.api.rng.Random;
  * epislon is annealed to minEpsilon over epsilonNbStep steps
  *
  */
-@AllArgsConstructor
 @Slf4j
-public class EpsGreedy<OBSERVATION extends Encodable, A, AS extends ActionSpace<A>> extends Policy<A> {
+public class EpsGreedy<A> extends Policy<A> {
 
-    final private Policy<A> policy;
-    final private MDP<OBSERVATION, A, AS> mdp;
+    final private INeuralNetPolicy<A> policy;
     final private int updateStart;
     final private int epsilonNbStep;
     final private Random rnd;
     final private double minEpsilon;
+
+    private final IActionSchema<A> actionSchema;
+
+    final private MDP<Encodable, A, ActionSpace<A>> mdp;
     final private IEpochTrainer learning;
 
+    // Using agent's (learning's) step count is incorrect; frame skipping makes epsilon's value decrease too quickly
+    private int annealingStep = 0;
+
+    @Deprecated
+    public <OBSERVATION extends Encodable, AS extends ActionSpace<A>> EpsGreedy(Policy<A> policy,
+                                                                                MDP<Encodable, A, ActionSpace<A>> mdp,
+                                                                                int updateStart,
+                                                                                int epsilonNbStep,
+                                                                                Random rnd,
+                                                                                double minEpsilon,
+                                                                                IEpochTrainer learning) {
+        this.policy = policy;
+        this.mdp = mdp;
+        this.updateStart = updateStart;
+        this.epsilonNbStep = epsilonNbStep;
+        this.rnd = rnd;
+        this.minEpsilon = minEpsilon;
+        this.learning = learning;
+
+        this.actionSchema = null;
+    }
+
+    public EpsGreedy(@NonNull Policy<A> policy, @NonNull IActionSchema<A> actionSchema, double minEpsilon, int updateStart, int epsilonNbStep) {
+        this(policy, actionSchema, minEpsilon, updateStart, epsilonNbStep, null);
+    }
+
+    @Builder
+    public EpsGreedy(@NonNull INeuralNetPolicy<A> policy, @NonNull IActionSchema<A> actionSchema, double minEpsilon, int updateStart, int epsilonNbStep, Random rnd) {
+        this.policy = policy;
+
+        this.rnd = rnd == null ? Nd4j.getRandom() : rnd;
+        this.minEpsilon = minEpsilon;
+        this.updateStart = updateStart;
+        this.epsilonNbStep = epsilonNbStep;
+        this.actionSchema = actionSchema;
+
+        this.mdp = null;
+        this.learning = null;
+    }
+
     public NeuralNet getNeuralNet() {
         return policy.getNeuralNet();
     }
@@ -57,6 +102,11 @@ public class EpsGreedy<OBSERVATION extends Encodable, A, AS extends ActionSpace<
     public A nextAction(INDArray input) {
 
         double ep = getEpsilon();
+        if(actionSchema != null) {
+            // Only legacy classes should pass here.
+            throw new RuntimeException("nextAction(Observation observation) should be called when using a AgentLearner");
+        }
+
         if (learning.getStepCount() % 500 == 1)
             log.info("EP: " + ep + " " + learning.getStepCount());
         if (rnd.nextDouble() > ep)
@@ -66,10 +116,31 @@ public class EpsGreedy<OBSERVATION extends Encodable, A, AS extends ActionSpace<
     }
 
     public A nextAction(Observation observation) {
-        return this.nextAction(observation.getData());
+        if(actionSchema == null) {
+            return this.nextAction(observation.getData());
+        }
+
+        A result;
+
+        double ep = getEpsilon();
+        if (annealingStep % 500 == 1) {
+            log.info("EP: " + ep + " " + annealingStep);
+        }
+
+        if (rnd.nextDouble() > ep) {
+            result = policy.nextAction(observation);
+        }
+        else {
+            result = actionSchema.getRandomAction();
+        }
+
+        ++annealingStep;
+
+        return result;
     }
 
     public double getEpsilon() {
-        return Math.min(1.0, Math.max(minEpsilon, 1.0 - (learning.getStepCount() - updateStart) * 1.0 / epsilonNbStep));
+        int step = actionSchema != null ? annealingStep : learning.getStepCount();
+        return Math.min(1.0, Math.max(minEpsilon, 1.0 - (step - updateStart) * 1.0 / epsilonNbStep));
     }
 }
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java
new file mode 100644
index 000000000..c213396c6
--- /dev/null
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/INeuralNetPolicy.java
@@ -0,0 +1,7 @@
+package org.deeplearning4j.rl4j.policy;
+
+import org.deeplearning4j.rl4j.network.NeuralNet;
+
+public interface INeuralNetPolicy<ACTION> extends IPolicy<ACTION> {
+    NeuralNet getNeuralNet();
+}
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java
index 6a4146c94..cf369e359 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/policy/Policy.java
@@ -34,7 +34,7 @@ import org.deeplearning4j.rl4j.util.LegacyMDPWrapper;
  *
  * A Policy responsability is to choose the next action given a state
  */
-public abstract class Policy<A> implements IPolicy<A> {
+public abstract class Policy<A> implements INeuralNetPolicy<A> {
 
     public abstract NeuralNet getNeuralNet();
 
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java
new file mode 100644
index 000000000..e0c0685bf
--- /dev/null
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentLearnerTest.java
@@ -0,0 +1,211 @@
+package org.deeplearning4j.rl4j.agent;
+
+import org.deeplearning4j.rl4j.agent.learning.LearningBehavior;
+import org.deeplearning4j.rl4j.environment.Environment;
+import org.deeplearning4j.rl4j.environment.IntegerActionSchema;
+import org.deeplearning4j.rl4j.environment.Schema;
+import org.deeplearning4j.rl4j.environment.StepResult;
+import org.deeplearning4j.rl4j.observation.Observation;
+import org.deeplearning4j.rl4j.observation.transform.TransformProcess;
+import org.deeplearning4j.rl4j.policy.IPolicy;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.junit.MockitoJUnitRunner;
+import org.mockito.stubbing.Answer;
+import org.nd4j.linalg.factory.Nd4j;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.mockito.ArgumentMatchers.*;
+import static org.mockito.Mockito.*;
+import static org.junit.Assert.*;
+
+@RunWith(MockitoJUnitRunner.class)
+public class AgentLearnerTest {
+
+    @Mock
+    Environment<Integer> environmentMock;
+
+    @Mock
+    TransformProcess transformProcessMock;
+
+    @Mock
+    IPolicy<Integer> policyMock;
+
+    @Mock
+    LearningBehavior<Integer, Object> learningBehaviorMock;
+
+    @Test
+    public void when_episodeIsStarted_expect_learningBehaviorHandleEpisodeStartCalled() {
+        // Arrange
+        AgentLearner<Integer> sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock)
+                .maxEpisodeSteps(3)
+                .build();
+
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
+        when(environmentMock.reset()).thenReturn(new HashMap<>());
+        when(environmentMock.getSchema()).thenReturn(schema);
+        StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false);
+        when(environmentMock.step(any(Integer.class))).thenReturn(stepResult);
+
+        when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean())).thenReturn(new Observation(Nd4j.create(new double[] { 123.0 })));
+
+        when(policyMock.nextAction(any(Observation.class))).thenReturn(123);
+
+        // Act
+        sut.run();
+
+        // Assert
+        verify(learningBehaviorMock, times(1)).handleEpisodeStart();
+    }
+
+    @Test
+    public void when_runIsCalled_expect_experienceHandledWithLearningBehavior() {
+        // Arrange
+        AgentLearner<Integer> sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock)
+                .maxEpisodeSteps(4)
+                .build();
+
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
+        when(environmentMock.getSchema()).thenReturn(schema);
+        when(environmentMock.reset()).thenReturn(new HashMap<>());
+
+        double[] reward = new double[] { 0.0 };
+        when(environmentMock.step(any(Integer.class)))
+                .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0));
+
+        when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0);
+
+        when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean()))
+                .thenAnswer(new Answer<Observation>() {
+                    public Observation answer(InvocationOnMock invocation) throws Throwable {
+                        int step = (int)invocation.getArgument(1);
+                        boolean isTerminal = (boolean)invocation.getArgument(2);
+                        return (step % 2 == 0 || isTerminal)
+                                ? new Observation(Nd4j.create(new double[] { step * 1.1 }))
+                                : Observation.SkippedObservation;
+                    }
+                });
+
+        when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]);
+
+        // Act
+        sut.run();
+
+        // Assert
+        ArgumentCaptor<Observation> observationCaptor = ArgumentCaptor.forClass(Observation.class);
+        ArgumentCaptor<Integer> actionCaptor = ArgumentCaptor.forClass(Integer.class);
+        ArgumentCaptor<Double> rewardCaptor = ArgumentCaptor.forClass(Double.class);
+        ArgumentCaptor<Boolean> isTerminalCaptor = ArgumentCaptor.forClass(Boolean.class);
+
+        verify(learningBehaviorMock, times(2)).handleNewExperience(observationCaptor.capture(), actionCaptor.capture(), rewardCaptor.capture(), isTerminalCaptor.capture());
+        List<Observation> observations = observationCaptor.getAllValues();
+        List<Integer> actions = actionCaptor.getAllValues();
+        List<Double> rewards = rewardCaptor.getAllValues();
+        List<Boolean> isTerminalList = isTerminalCaptor.getAllValues();
+
+        assertEquals(0.0, observations.get(0).getData().getDouble(0), 0.00001);
+        assertEquals(0, (int)actions.get(0));
+        assertEquals(0.0 + 1.0, rewards.get(0), 0.00001);
+        assertFalse(isTerminalList.get(0));
+
+        assertEquals(2.2, observations.get(1).getData().getDouble(0), 0.00001);
+        assertEquals(2, (int)actions.get(1));
+        assertEquals(2.0 + 3.0, rewards.get(1), 0.00001);
+        assertFalse(isTerminalList.get(1));
+
+        ArgumentCaptor<Observation> finalObservationCaptor = ArgumentCaptor.forClass(Observation.class);
+        verify(learningBehaviorMock, times(1)).handleEpisodeEnd(finalObservationCaptor.capture());
+        assertEquals(4.4, finalObservationCaptor.getValue().getData().getDouble(0), 0.00001);
+    }
+
+    @Test
+    public void when_runIsCalledMultipleTimes_expect_totalStepCountCorrect() {
+        // Arrange
+        AgentLearner<Integer> sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock)
+                .maxEpisodeSteps(4)
+                .build();
+
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
+        when(environmentMock.getSchema()).thenReturn(schema);
+        when(environmentMock.reset()).thenReturn(new HashMap<>());
+
+        double[] reward = new double[] { 0.0 };
+        when(environmentMock.step(any(Integer.class)))
+                .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0));
+
+        when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0);
+
+        when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean()))
+                .thenAnswer(new Answer<Observation>() {
+                    public Observation answer(InvocationOnMock invocation) throws Throwable {
+                        int step = (int)invocation.getArgument(1);
+                        boolean isTerminal = (boolean)invocation.getArgument(2);
+                        return (step % 2 == 0 || isTerminal)
+                                ? new Observation(Nd4j.create(new double[] { step * 1.1 }))
+                                : Observation.SkippedObservation;
+                    }
+                });
+
+        when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]);
+
+        // Act
+        sut.run();
+        reward[0] = 0.0;
+        sut.run();
+
+        // Assert
+        assertEquals(8, sut.getTotalStepCount());
+    }
+
+    @Test
+    public void when_runIsCalledMultipleTimes_expect_rewardSentToLearningBehaviorToBeCorrect() {
+        // Arrange
+        AgentLearner<Integer> sut = AgentLearner.builder(environmentMock, transformProcessMock, policyMock, learningBehaviorMock)
+                .maxEpisodeSteps(4)
+                .build();
+
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
+        when(environmentMock.getSchema()).thenReturn(schema);
+        when(environmentMock.reset()).thenReturn(new HashMap<>());
+
+        double[] reward = new double[] { 0.0 };
+        when(environmentMock.step(any(Integer.class)))
+                .thenAnswer(a -> new StepResult(new HashMap<>(), ++reward[0], reward[0] == 4.0));
+
+        when(environmentMock.isEpisodeFinished()).thenAnswer(x -> reward[0] == 4.0);
+
+        when(transformProcessMock.transform(any(Map.class), anyInt(), anyBoolean()))
+                .thenAnswer(new Answer<Observation>() {
+                    public Observation answer(InvocationOnMock invocation) throws Throwable {
+                        int step = (int)invocation.getArgument(1);
+                        boolean isTerminal = (boolean)invocation.getArgument(2);
+                        return (step % 2 == 0 || isTerminal)
+                                ? new Observation(Nd4j.create(new double[] { step * 1.1 }))
+                                : Observation.SkippedObservation;
+                    }
+                });
+
+        when(policyMock.nextAction(any(Observation.class))).thenAnswer(x -> (int)reward[0]);
+
+        // Act
+        sut.run();
+        reward[0] = 0.0;
+        sut.run();
+
+        // Assert
+        ArgumentCaptor<Double> rewardCaptor = ArgumentCaptor.forClass(Double.class);
+
+        verify(learningBehaviorMock, times(4)).handleNewExperience(any(Observation.class), any(Integer.class), rewardCaptor.capture(), any(Boolean.class));
+        List<Double> rewards = rewardCaptor.getAllValues();
+
+        // rewardAtLastExperience at the end of 1st call to .run() should not leak into 2nd call.
+        assertEquals(0.0 + 1.0, rewards.get(2), 0.00001);
+        assertEquals(2.0 + 3.0, rewards.get(3), 0.00001);
+    }
+}
\ No newline at end of file
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java
index a8beae640..0022e61f0 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/AgentTest.java
@@ -1,10 +1,7 @@
 package org.deeplearning4j.rl4j.agent;
 
 import org.deeplearning4j.rl4j.agent.listener.AgentListener;
-import org.deeplearning4j.rl4j.environment.ActionSchema;
-import org.deeplearning4j.rl4j.environment.Environment;
-import org.deeplearning4j.rl4j.environment.Schema;
-import org.deeplearning4j.rl4j.environment.StepResult;
+import org.deeplearning4j.rl4j.environment.*;
 import org.deeplearning4j.rl4j.observation.Observation;
 import org.deeplearning4j.rl4j.observation.transform.TransformProcess;
 import org.deeplearning4j.rl4j.policy.IPolicy;
@@ -12,6 +9,7 @@ import org.junit.Rule;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
+import org.junit.runner.RunWith;
 import org.mockito.*;
 import org.mockito.junit.*;
 import org.nd4j.linalg.factory.Nd4j;
@@ -23,8 +21,8 @@ import java.util.Map;
 import static org.mockito.ArgumentMatchers.*;
 import static org.mockito.Mockito.*;
 
+@RunWith(MockitoJUnitRunner.class)
 public class AgentTest {
-
     @Mock Environment environmentMock;
     @Mock TransformProcess transformProcessMock;
     @Mock IPolicy policyMock;
@@ -102,7 +100,7 @@ public class AgentTest {
     public void when_runIsCalled_expect_agentIsReset() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -119,7 +117,7 @@ public class AgentTest {
         sut.run();
 
         // Assert
-        assertEquals(0, sut.getEpisodeStepNumber());
+        assertEquals(0, sut.getEpisodeStepCount());
         verify(transformProcessMock).transform(envResetResult, 0, false);
         verify(policyMock, times(1)).reset();
         assertEquals(0.0, sut.getReward(), 0.00001);
@@ -130,7 +128,7 @@ public class AgentTest {
     public void when_runIsCalled_expect_onBeforeAndAfterEpisodeCalled() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -152,7 +150,7 @@ public class AgentTest {
     public void when_onBeforeEpisodeReturnsStop_expect_performStepAndOnAfterEpisodeNotCalled() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -179,7 +177,7 @@ public class AgentTest {
     public void when_runIsCalledWithoutMaxStep_expect_agentRunUntilEpisodeIsFinished() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -191,10 +189,10 @@ public class AgentTest {
         final Agent spy = Mockito.spy(sut);
 
         doAnswer(invocation -> {
-            ((Agent)invocation.getMock()).incrementEpisodeStepNumber();
+            ((Agent)invocation.getMock()).incrementEpisodeStepCount();
             return null;
         }).when(spy).performStep();
-        when(environmentMock.isEpisodeFinished()).thenAnswer(invocation -> spy.getEpisodeStepNumber() >= 5 );
+        when(environmentMock.isEpisodeFinished()).thenAnswer(invocation -> spy.getEpisodeStepCount() >= 5 );
 
         // Act
         spy.run();
@@ -209,7 +207,7 @@ public class AgentTest {
     public void when_maxStepsIsReachedBeforeEposideEnds_expect_runTerminated() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -222,7 +220,7 @@ public class AgentTest {
         final Agent spy = Mockito.spy(sut);
 
         doAnswer(invocation -> {
-            ((Agent)invocation.getMock()).incrementEpisodeStepNumber();
+            ((Agent)invocation.getMock()).incrementEpisodeStepCount();
             return null;
         }).when(spy).performStep();
 
@@ -239,7 +237,7 @@ public class AgentTest {
     public void when_initialObservationsAreSkipped_expect_performNoOpAction() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -264,7 +262,7 @@ public class AgentTest {
     public void when_initialObservationsAreSkipped_expect_performNoOpActionAnd() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -289,7 +287,7 @@ public class AgentTest {
     public void when_observationsIsSkipped_expect_performLastAction() {
         // Arrange
         Map<String, Object> envResetResult = new HashMap<>();
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(envResetResult);
         when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(envResetResult, 0.0, false));
         when(environmentMock.getSchema()).thenReturn(schema);
@@ -331,7 +329,7 @@ public class AgentTest {
     @Test
     public void when_onBeforeStepReturnsStop_expect_performStepAndOnAfterEpisodeNotCalled() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
 
@@ -358,7 +356,7 @@ public class AgentTest {
     @Test
     public void when_observationIsNotSkipped_expect_policyActionIsSentToEnvironment() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
         when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(new HashMap<>(), 0.0, false));
@@ -381,7 +379,7 @@ public class AgentTest {
     @Test
     public void when_stepResultIsReceived_expect_observationAndRewardUpdated() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
         when(environmentMock.step(any(Integer.class))).thenReturn(new StepResult(new HashMap<>(), 234.0, false));
@@ -405,7 +403,7 @@ public class AgentTest {
     @Test
     public void when_stepIsDone_expect_onAfterStepAndWithStepResult() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
         StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false);
@@ -430,7 +428,7 @@ public class AgentTest {
     @Test
     public void when_onAfterStepReturnsStop_expect_onAfterEpisodeNotCalled() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
         StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false);
@@ -458,7 +456,7 @@ public class AgentTest {
     @Test
     public void when_runIsCalled_expect_onAfterEpisodeIsCalled() {
         // Arrange
-        Schema schema = new Schema(new ActionSchema<>(-1));
+        Schema schema = new Schema(new IntegerActionSchema(0, -1));
         when(environmentMock.reset()).thenReturn(new HashMap<>());
         when(environmentMock.getSchema()).thenReturn(schema);
         StepResult stepResult = new StepResult(new HashMap<>(), 234.0, false);
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java
new file mode 100644
index 000000000..1e39c63d5
--- /dev/null
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/agent/learning/LearningBehaviorTest.java
@@ -0,0 +1,133 @@
+package org.deeplearning4j.rl4j.agent.learning;
+
+import org.deeplearning4j.rl4j.agent.update.IUpdateRule;
+import org.deeplearning4j.rl4j.experience.ExperienceHandler;
+import org.deeplearning4j.rl4j.observation.Observation;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.*;
+
+@RunWith(MockitoJUnitRunner.class)
+public class LearningBehaviorTest {
+
+    @Mock
+    ExperienceHandler<Integer, Object> experienceHandlerMock;
+
+    @Mock
+    IUpdateRule<Object> updateRuleMock;
+
+    LearningBehavior<Integer, Object> sut;
+
+    @Before
+    public void setup() {
+        sut = LearningBehavior.<Integer, Object>builder()
+            .experienceHandler(experienceHandlerMock)
+            .updateRule(updateRuleMock)
+            .build();
+    }
+
+    @Test
+    public void when_callingHandleEpisodeStart_expect_experienceHandlerResetCalled() {
+        // Arrange
+        LearningBehavior<Integer, Object> sut = LearningBehavior.<Integer, Object>builder()
+                .experienceHandler(experienceHandlerMock)
+                .updateRule(updateRuleMock)
+                .build();
+
+        // Act
+        sut.handleEpisodeStart();
+
+        // Assert
+        verify(experienceHandlerMock, times(1)).reset();
+    }
+
+    @Test
+    public void when_callingHandleNewExperience_expect_experienceHandlerAddExperienceCalled() {
+        // Arrange
+        INDArray observationData = Nd4j.rand(1, 1);
+        when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(false);
+
+        // Act
+        sut.handleNewExperience(new Observation(observationData), 1, 2.0, false);
+
+        // Assert
+        ArgumentCaptor<Observation> observationCaptor = ArgumentCaptor.forClass(Observation.class);
+        ArgumentCaptor<Integer> actionCaptor = ArgumentCaptor.forClass(Integer.class);
+        ArgumentCaptor<Double> rewardCaptor = ArgumentCaptor.forClass(Double.class);
+        ArgumentCaptor<Boolean> isTerminatedCaptor = ArgumentCaptor.forClass(Boolean.class);
+        verify(experienceHandlerMock, times(1)).addExperience(observationCaptor.capture(), actionCaptor.capture(), rewardCaptor.capture(), isTerminatedCaptor.capture());
+
+        assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001);
+        assertEquals(1, (int)actionCaptor.getValue());
+        assertEquals(2.0, (double)rewardCaptor.getValue(), 0.00001);
+        assertFalse(isTerminatedCaptor.getValue());
+
+        verify(updateRuleMock, never()).update(any(List.class));
+    }
+
+    @Test
+    public void when_callingHandleNewExperienceAndTrainingBatchIsReady_expect_updateRuleUpdateWithTrainingBatch() {
+        // Arrange
+        INDArray observationData = Nd4j.rand(1, 1);
+        when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(true);
+        List<Object> trainingBatch = new ArrayList<Object>();
+        when(experienceHandlerMock.generateTrainingBatch()).thenReturn(trainingBatch);
+
+        // Act
+        sut.handleNewExperience(new Observation(observationData), 1, 2.0, false);
+
+        // Assert
+        verify(updateRuleMock, times(1)).update(trainingBatch);
+    }
+
+    @Test
+    public void when_callingHandleEpisodeEnd_expect_experienceHandlerSetFinalObservationCalled() {
+        // Arrange
+        INDArray observationData = Nd4j.rand(1, 1);
+        when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(false);
+
+        // Act
+        sut.handleEpisodeEnd(new Observation(observationData));
+
+        // Assert
+        ArgumentCaptor<Observation> observationCaptor = ArgumentCaptor.forClass(Observation.class);
+        verify(experienceHandlerMock, times(1)).setFinalObservation(observationCaptor.capture());
+
+        assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001);
+
+        verify(updateRuleMock, never()).update(any(List.class));
+    }
+
+    @Test
+    public void when_callingHandleEpisodeEndAndTrainingBatchIsNotEmpty_expect_updateRuleUpdateWithTrainingBatch() {
+        // Arrange
+        INDArray observationData = Nd4j.rand(1, 1);
+        when(experienceHandlerMock.isTrainingBatchReady()).thenReturn(true);
+        List<Object> trainingBatch = new ArrayList<Object>();
+        when(experienceHandlerMock.generateTrainingBatch()).thenReturn(trainingBatch);
+
+        // Act
+        sut.handleEpisodeEnd(new Observation(observationData));
+
+        // Assert
+        ArgumentCaptor<Observation> observationCaptor = ArgumentCaptor.forClass(Observation.class);
+        verify(experienceHandlerMock, times(1)).setFinalObservation(observationCaptor.capture());
+
+        assertEquals(observationData.getDouble(0, 0), observationCaptor.getValue().getData().getDouble(0, 0), 0.00001);
+
+        verify(updateRuleMock, times(1)).update(trainingBatch);
+    }
+}
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java
index 765a14c8f..0d90e812d 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/ReplayMemoryExperienceHandlerTest.java
@@ -4,34 +4,44 @@ import org.deeplearning4j.rl4j.learning.sync.IExpReplay;
 import org.deeplearning4j.rl4j.learning.sync.Transition;
 import org.deeplearning4j.rl4j.observation.Observation;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
 import org.nd4j.linalg.factory.Nd4j;
 
-import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
 
+@RunWith(MockitoJUnitRunner.class)
 public class ReplayMemoryExperienceHandlerTest {
+
+    @Mock
+    IExpReplay<Integer> expReplayMock;
+
     @Test
     public void when_addingFirstExperience_expect_notAddedToStoreBeforeNextObservationIsAdded() {
         // Arrange
-        TestExpReplay expReplayMock = new TestExpReplay();
+        when(expReplayMock.getDesignatedBatchSize()).thenReturn(10);
+
         ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock);
 
         // Act
         sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false);
-        int numStoredTransitions = expReplayMock.addedTransitions.size();
+        boolean isStoreCalledAfterFirstAdd = mockingDetails(expReplayMock).getInvocations().stream().anyMatch(x -> x.getMethod().getName() == "store");
         sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false);
+        boolean isStoreCalledAfterSecondAdd = mockingDetails(expReplayMock).getInvocations().stream().anyMatch(x -> x.getMethod().getName() == "store");
 
         // Assert
-        assertEquals(0, numStoredTransitions);
-        assertEquals(1, expReplayMock.addedTransitions.size());
+        assertFalse(isStoreCalledAfterFirstAdd);
+        assertTrue(isStoreCalledAfterSecondAdd);
     }
 
     @Test
     public void when_addingExperience_expect_transitionsAreCorrect() {
         // Arrange
-        TestExpReplay expReplayMock = new TestExpReplay();
         ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock);
 
         // Act
@@ -40,24 +50,25 @@ public class ReplayMemoryExperienceHandlerTest {
         sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 })));
 
         // Assert
-        assertEquals(2, expReplayMock.addedTransitions.size());
+        ArgumentCaptor<Transition<Integer>> argument = ArgumentCaptor.forClass(Transition.class);
+        verify(expReplayMock, times(2)).store(argument.capture());
+        List<Transition<Integer>> transitions = argument.getAllValues();
 
-        assertEquals(1.0, expReplayMock.addedTransitions.get(0).getObservation().getData().getDouble(0), 0.00001);
-        assertEquals(1, (int)expReplayMock.addedTransitions.get(0).getAction());
-        assertEquals(1.0, expReplayMock.addedTransitions.get(0).getReward(), 0.00001);
-        assertEquals(2.0, expReplayMock.addedTransitions.get(0).getNextObservation().getDouble(0), 0.00001);
+        assertEquals(1.0, transitions.get(0).getObservation().getData().getDouble(0), 0.00001);
+        assertEquals(1, (int)transitions.get(0).getAction());
+        assertEquals(1.0, transitions.get(0).getReward(), 0.00001);
+        assertEquals(2.0, transitions.get(0).getNextObservation().getDouble(0), 0.00001);
 
-        assertEquals(2.0, expReplayMock.addedTransitions.get(1).getObservation().getData().getDouble(0), 0.00001);
-        assertEquals(2, (int)expReplayMock.addedTransitions.get(1).getAction());
-        assertEquals(2.0, expReplayMock.addedTransitions.get(1).getReward(), 0.00001);
-        assertEquals(3.0, expReplayMock.addedTransitions.get(1).getNextObservation().getDouble(0), 0.00001);
+        assertEquals(2.0, transitions.get(1).getObservation().getData().getDouble(0), 0.00001);
+        assertEquals(2, (int)transitions.get(1).getAction());
+        assertEquals(2.0, transitions.get(1).getReward(), 0.00001);
+        assertEquals(3.0, transitions.get(1).getNextObservation().getDouble(0), 0.00001);
 
     }
 
     @Test
     public void when_settingFinalObservation_expect_nextAddedExperienceDoNotUsePreviousObservation() {
         // Arrange
-        TestExpReplay expReplayMock = new TestExpReplay();
         ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock);
 
         // Act
@@ -66,42 +77,57 @@ public class ReplayMemoryExperienceHandlerTest {
         sut.addExperience(new Observation(Nd4j.create(new double[] { 3.0 })), 3, 3.0, false);
 
         // Assert
-        assertEquals(1, expReplayMock.addedTransitions.size());
-        assertEquals(1, (int)expReplayMock.addedTransitions.get(0).getAction());
+        ArgumentCaptor<Transition<Integer>> argument = ArgumentCaptor.forClass(Transition.class);
+        verify(expReplayMock, times(1)).store(argument.capture());
+        Transition<Integer> transition = argument.getValue();
+
+        assertEquals(1, (int)transition.getAction());
     }
 
     @Test
     public void when_addingExperience_expect_getTrainingBatchSizeReturnSize() {
         // Arrange
-        TestExpReplay expReplayMock = new TestExpReplay();
-        ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(expReplayMock);
+        ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom());
         sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false);
         sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false);
         sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 })));
 
         // Act
         int size = sut.getTrainingBatchSize();
+
         // Assert
         assertEquals(2, size);
     }
 
-    private static class TestExpReplay implements IExpReplay<Integer> {
+    @Test
+    public void when_experienceSizeIsSmallerThanBatchSize_expect_TrainingBatchIsNotReady() {
+        // Arrange
+        ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom());
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false);
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false);
+        sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 3.0 })));
 
-        public final List<Transition<Integer>> addedTransitions = new ArrayList<>();
+        // Act
 
-        @Override
-        public ArrayList<Transition<Integer>> getBatch() {
-            return null;
-        }
-
-        @Override
-        public void store(Transition<Integer> transition) {
-            addedTransitions.add(transition);
-        }
-
-        @Override
-        public int getBatchSize() {
-            return addedTransitions.size();
-        }
+        // Assert
+        assertFalse(sut.isTrainingBatchReady());
     }
+
+    @Test
+    public void when_experienceSizeIsGreaterOrEqualToBatchSize_expect_TrainingBatchIsReady() {
+        // Arrange
+        ReplayMemoryExperienceHandler sut = new ReplayMemoryExperienceHandler(10, 5, Nd4j.getRandom());
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 1.0 })), 1, 1.0, false);
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 2.0 })), 2, 2.0, false);
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 3.0 })), 3, 3.0, false);
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 4.0 })), 4, 4.0, false);
+        sut.addExperience(new Observation(Nd4j.create(new double[] { 5.0 })), 5, 5.0, false);
+        sut.setFinalObservation(new Observation(Nd4j.create(new double[] { 6.0 })));
+
+        // Act
+
+        // Assert
+        assertTrue(sut.isTrainingBatchReady());
+    }
+
 }
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java
index 7334ff87a..2ce0d6659 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/experience/StateActionExperienceHandlerTest.java
@@ -13,7 +13,7 @@ public class StateActionExperienceHandlerTest {
     @Test
     public void when_addingExperience_expect_generateTrainingBatchReturnsIt() {
         // Arrange
-        StateActionExperienceHandler sut = new StateActionExperienceHandler();
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE);
         sut.reset();
         Observation observation = new Observation(Nd4j.zeros(1));
         sut.addExperience(observation, 123, 234.0, true);
@@ -32,7 +32,7 @@ public class StateActionExperienceHandlerTest {
     @Test
     public void when_addingMultipleExperiences_expect_generateTrainingBatchReturnsItInSameOrder() {
         // Arrange
-        StateActionExperienceHandler sut = new StateActionExperienceHandler();
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE);
         sut.reset();
         sut.addExperience(null, 1, 1.0, false);
         sut.addExperience(null, 2, 2.0, false);
@@ -51,7 +51,7 @@ public class StateActionExperienceHandlerTest {
     @Test
     public void when_gettingExperience_expect_experienceStoreIsCleared() {
         // Arrange
-        StateActionExperienceHandler sut = new StateActionExperienceHandler();
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE);
         sut.reset();
         sut.addExperience(null, 1, 1.0, false);
 
@@ -67,7 +67,7 @@ public class StateActionExperienceHandlerTest {
     @Test
     public void when_addingExperience_expect_getTrainingBatchSizeReturnSize() {
         // Arrange
-        StateActionExperienceHandler sut = new StateActionExperienceHandler();
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(Integer.MAX_VALUE);
         sut.reset();
         sut.addExperience(null, 1, 1.0, false);
         sut.addExperience(null, 2, 2.0, false);
@@ -79,4 +79,66 @@ public class StateActionExperienceHandlerTest {
         // Assert
         assertEquals(3, size);
     }
+
+    @Test
+    public void when_experienceIsEmpty_expect_TrainingBatchNotReady() {
+        // Arrange
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(5);
+        sut.reset();
+
+        // Act
+        boolean isTrainingBatchReady = sut.isTrainingBatchReady();
+
+        // Assert
+        assertFalse(isTrainingBatchReady);
+    }
+
+    @Test
+    public void when_experienceSizeIsGreaterOrEqualToThanBatchSize_expect_TrainingBatchIsReady() {
+        // Arrange
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(5);
+        sut.reset();
+        sut.addExperience(null, 1, 1.0, false);
+        sut.addExperience(null, 2, 2.0, false);
+        sut.addExperience(null, 3, 3.0, false);
+        sut.addExperience(null, 4, 4.0, false);
+        sut.addExperience(null, 5, 5.0, false);
+
+        // Act
+        boolean isTrainingBatchReady = sut.isTrainingBatchReady();
+
+        // Assert
+        assertTrue(isTrainingBatchReady);
+    }
+
+    @Test
+    public void when_experienceSizeIsSmallerThanBatchSizeButFinalObservationIsSet_expect_TrainingBatchIsReady() {
+        // Arrange
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(5);
+        sut.reset();
+        sut.addExperience(null, 1, 1.0, false);
+        sut.addExperience(null, 2, 2.0, false);
+        sut.setFinalObservation(null);
+
+        // Act
+        boolean isTrainingBatchReady = sut.isTrainingBatchReady();
+
+        // Assert
+        assertTrue(isTrainingBatchReady);
+    }
+
+    @Test
+    public void when_experienceSizeIsZeroAndFinalObservationIsSet_expect_TrainingBatchIsNotReady() {
+        // Arrange
+        StateActionExperienceHandler sut = new StateActionExperienceHandler(5);
+        sut.reset();
+        sut.setFinalObservation(null);
+
+        // Act
+        boolean isTrainingBatchReady = sut.isTrainingBatchReady();
+
+        // Assert
+        assertFalse(isTrainingBatchReady);
+    }
+
 }
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java
index e1c5c64ed..7af15b8c4 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/helper/INDArrayHelperTest.java
@@ -49,4 +49,25 @@ public class INDArrayHelperTest {
         assertEquals(1, output.shape()[1]);
     }
 
+    @Test
+    public void when_callingCreateBatchForShape_expect_INDArrayWithCorrectShapeAndOriginalShapeUnchanged() {
+        // Arrange
+        long[] shape = new long[] { 1, 3, 4};
+
+        // Act
+        INDArray output = INDArrayHelper.createBatchForShape(2, shape);
+
+        // Assert
+        // Output shape
+        assertEquals(3, output.shape().length);
+        assertEquals(2, output.shape()[0]);
+        assertEquals(3, output.shape()[1]);
+        assertEquals(4, output.shape()[2]);
+
+        // Input should remain unchanged
+        assertEquals(1, shape[0]);
+        assertEquals(3, shape[1]);
+        assertEquals(4, shape[2]);
+
+    }
 }
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java
index f44437d67..ae83bd1f0 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/async/nstep/discrete/QLearningUpdateAlgorithmTest.java
@@ -19,10 +19,11 @@ package org.deeplearning4j.rl4j.learning.async.nstep.discrete;
 import org.deeplearning4j.rl4j.experience.StateActionPair;
 import org.deeplearning4j.rl4j.learning.async.AsyncGlobal;
 import org.deeplearning4j.rl4j.learning.async.UpdateAlgorithm;
+import org.deeplearning4j.rl4j.network.dqn.IDQN;
 import org.deeplearning4j.rl4j.observation.Observation;
-import org.deeplearning4j.rl4j.support.MockDQN;
 import org.junit.Test;
 import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
 import org.mockito.Mock;
 import org.mockito.junit.MockitoJUnitRunner;
 import org.nd4j.linalg.api.ndarray.INDArray;
@@ -32,6 +33,9 @@ import java.util.ArrayList;
 import java.util.List;
 
 import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.Mockito.*;
 
 @RunWith(MockitoJUnitRunner.class)
 public class QLearningUpdateAlgorithmTest {
@@ -39,12 +43,24 @@ public class QLearningUpdateAlgorithmTest {
     @Mock
     AsyncGlobal mockAsyncGlobal;
 
+    @Mock
+    IDQN dqnMock;
+
+    private UpdateAlgorithm sut;
+
+    private void setup(double gamma) {
+        // mock a neural net output -- just invert the sign of the input
+        when(dqnMock.outputAll(any(INDArray.class))).thenAnswer(invocation -> new INDArray[] { invocation.getArgument(0, INDArray.class).mul(-1.0) });
+
+        sut = new QLearningUpdateAlgorithm(2, gamma);
+    }
+
     @Test
     public void when_isTerminal_expect_initRewardIs0() {
         // Arrange
-        MockDQN dqnMock = new MockDQN();
-        UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 1 }, 1, 1.0);
-        final Observation observation = new Observation(Nd4j.zeros(1));
+        setup(1.0);
+
+        final Observation observation = new Observation(Nd4j.zeros(1, 2));
         List<StateActionPair<Integer>> experience = new ArrayList<StateActionPair<Integer>>() {
             {
                 add(new StateActionPair<Integer>(observation, 0, 0.0, true));
@@ -55,59 +71,68 @@ public class QLearningUpdateAlgorithmTest {
         sut.computeGradients(dqnMock, experience);
 
         // Assert
-        assertEquals(0.0, dqnMock.gradientParams.get(0).getRight().getDouble(0), 0.00001);
+        verify(dqnMock, times(1)).gradient(any(INDArray.class), argThat((INDArray x) -> x.getDouble(0) == 0.0));
     }
 
     @Test
     public void when_terminalAndNoTargetUpdate_expect_initRewardWithMaxQFromCurrent() {
         // Arrange
-        UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 2 }, 2, 1.0);
-        final Observation observation = new Observation(Nd4j.create(new double[] { -123.0, -234.0 }));
+        setup(1.0);
+
+        final Observation observation = new Observation(Nd4j.create(new double[] { -123.0, -234.0 }).reshape(1, 2));
         List<StateActionPair<Integer>> experience = new ArrayList<StateActionPair<Integer>>() {
             {
                 add(new StateActionPair<Integer>(observation, 0, 0.0, false));
             }
         };
-        MockDQN dqnMock = new MockDQN();
 
         // Act
         sut.computeGradients(dqnMock, experience);
 
         // Assert
-        assertEquals(2, dqnMock.outputAllParams.size());
-        assertEquals(-123.0, dqnMock.outputAllParams.get(0).getDouble(0, 0), 0.00001);
-        assertEquals(234.0, dqnMock.gradientParams.get(0).getRight().getDouble(0), 0.00001);
+        ArgumentCaptor<INDArray> argument = ArgumentCaptor.forClass(INDArray.class);
+
+        verify(dqnMock, times(2)).outputAll(argument.capture());
+        List<INDArray> values = argument.getAllValues();
+        assertEquals(-123.0, values.get(0).getDouble(0, 0), 0.00001);
+        assertEquals(-123.0, values.get(1).getDouble(0, 0), 0.00001);
+
+        verify(dqnMock, times(1)).gradient(any(INDArray.class), argThat((INDArray x) -> x.getDouble(0) == 234.0));
     }
 
     @Test
     public void when_callingWithMultipleExperiences_expect_gradientsAreValid() {
         // Arrange
         double gamma = 0.9;
-        UpdateAlgorithm sut = new QLearningUpdateAlgorithm(new int[] { 2 }, 2, gamma);
+        setup(gamma);
+
         List<StateActionPair<Integer>> experience = new ArrayList<StateActionPair<Integer>>() {
             {
-                add(new StateActionPair<Integer>(new Observation(Nd4j.create(new double[] { -1.1, -1.2 })), 0, 1.0, false));
-                add(new StateActionPair<Integer>(new Observation(Nd4j.create(new double[] { -2.1, -2.2 })), 1, 2.0, true));
+                add(new StateActionPair<Integer>(new Observation(Nd4j.create(new double[] { -1.1, -1.2 }).reshape(1, 2)), 0, 1.0, false));
+                add(new StateActionPair<Integer>(new Observation(Nd4j.create(new double[] { -2.1, -2.2 }).reshape(1, 2)), 1, 2.0, true));
             }
         };
-        MockDQN dqnMock = new MockDQN();
 
         // Act
         sut.computeGradients(dqnMock, experience);
 
         // Assert
+        ArgumentCaptor<INDArray> features = ArgumentCaptor.forClass(INDArray.class);
+        ArgumentCaptor<INDArray> targets = ArgumentCaptor.forClass(INDArray.class);
+        verify(dqnMock, times(1)).gradient(features.capture(), targets.capture());
+
         // input side -- should be a stack of observations
-        INDArray input = dqnMock.gradientParams.get(0).getLeft();
-        assertEquals(-1.1, input.getDouble(0, 0), 0.00001);
-        assertEquals(-1.2, input.getDouble(0, 1), 0.00001);
-        assertEquals(-2.1, input.getDouble(1, 0), 0.00001);
-        assertEquals(-2.2, input.getDouble(1, 1), 0.00001);
+        INDArray featuresValues = features.getValue();
+        assertEquals(-1.1, featuresValues.getDouble(0, 0), 0.00001);
+        assertEquals(-1.2, featuresValues.getDouble(0, 1), 0.00001);
+        assertEquals(-2.1, featuresValues.getDouble(1, 0), 0.00001);
+        assertEquals(-2.2, featuresValues.getDouble(1, 1), 0.00001);
 
         // target side
-        INDArray target = dqnMock.gradientParams.get(0).getRight();
-        assertEquals(1.0 + gamma * 2.0, target.getDouble(0, 0), 0.00001);
-        assertEquals(1.2, target.getDouble(0, 1), 0.00001);
-        assertEquals(2.1, target.getDouble(1, 0), 0.00001);
-        assertEquals(2.0, target.getDouble(1, 1), 0.00001);
+        INDArray targetsValues = targets.getValue();
+        assertEquals(1.0 + gamma * 2.0, targetsValues.getDouble(0, 0), 0.00001);
+        assertEquals(1.2, targetsValues.getDouble(0, 1), 0.00001);
+        assertEquals(2.1, targetsValues.getDouble(1, 0), 0.00001);
+        assertEquals(2.0, targetsValues.getDouble(1, 1), 0.00001);
     }
 }
diff --git a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java
index e19af338b..e1424c286 100644
--- a/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java
+++ b/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscreteTest.java
@@ -18,6 +18,7 @@
 package org.deeplearning4j.rl4j.learning.sync.qlearning.discrete;
 
 import org.deeplearning4j.gym.StepReply;
+import org.deeplearning4j.rl4j.agent.learning.ILearningBehavior;
 import org.deeplearning4j.rl4j.learning.IHistoryProcessor;
 import org.deeplearning4j.rl4j.learning.configuration.QLearningConfiguration;
 import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
@@ -74,6 +75,9 @@ public class QLearningDiscreteTest {
     @Mock
     QLearningConfiguration mockQlearningConfiguration;
 
+    @Mock
+    ILearningBehavior<Integer> learningBehavior;
+
     // HWC
     int[] observationShape = new int[]{3, 10, 10};
     int totalObservationSize = 1;
@@ -92,18 +96,28 @@ public class QLearningDiscreteTest {
     }
 
 
-    private void mockTestContext(int maxSteps, int updateStart, int batchSize, double rewardFactor, int maxExperienceReplay) {
+    private void mockTestContext(int maxSteps, int updateStart, int batchSize, double rewardFactor, int maxExperienceReplay, ILearningBehavior<Integer> learningBehavior) {
         when(mockQlearningConfiguration.getBatchSize()).thenReturn(batchSize);
         when(mockQlearningConfiguration.getRewardFactor()).thenReturn(rewardFactor);
         when(mockQlearningConfiguration.getExpRepMaxSize()).thenReturn(maxExperienceReplay);
         when(mockQlearningConfiguration.getSeed()).thenReturn(123L);
 
-        qLearningDiscrete = mock(
-                QLearningDiscrete.class,
-                Mockito.withSettings()
-                        .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0)
-                        .defaultAnswer(Mockito.CALLS_REAL_METHODS)
-        );
+        if(learningBehavior != null) {
+            qLearningDiscrete = mock(
+                    QLearningDiscrete.class,
+                    Mockito.withSettings()
+                            .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0, learningBehavior, Nd4j.getRandom())
+                            .defaultAnswer(Mockito.CALLS_REAL_METHODS)
+            );
+        }
+        else {
+            qLearningDiscrete = mock(
+                    QLearningDiscrete.class,
+                    Mockito.withSettings()
+                            .useConstructor(mockMDP, mockDQN, mockQlearningConfiguration, 0)
+                            .defaultAnswer(Mockito.CALLS_REAL_METHODS)
+            );
+        }
     }
 
     private void mockHistoryProcessor(int skipFrames) {
@@ -136,7 +150,7 @@ public class QLearningDiscreteTest {
     public void when_singleTrainStep_expect_correctValues() {
 
         // Arrange
-        mockTestContext(100,0,2,1.0, 10);
+        mockTestContext(100,0,2,1.0, 10, null);
 
         // An example observation and 2 Q values output (2 actions)
         Observation observation = new Observation(Nd4j.zeros(observationShape));
@@ -162,7 +176,7 @@ public class QLearningDiscreteTest {
     @Test
     public void when_singleTrainStepSkippedFrames_expect_correctValues() {
         // Arrange
-        mockTestContext(100,0,2,1.0, 10);
+        mockTestContext(100,0,2,1.0, 10, learningBehavior);
 
         Observation skippedObservation = Observation.SkippedObservation;
         Observation nextObservation = new Observation(Nd4j.zeros(observationShape));
@@ -180,8 +194,8 @@ public class QLearningDiscreteTest {
         assertEquals(0, stepReply.getReward(), 1e-5);
         assertFalse(stepReply.isDone());
         assertFalse(stepReply.getObservation().isSkipped());
-        assertEquals(0, qLearningDiscrete.getExperienceHandler().getTrainingBatchSize());
 
+        verify(learningBehavior, never()).handleNewExperience(any(Observation.class), any(Integer.class), any(Double.class), any(Boolean.class));
         verify(mockDQN, never()).output(any(INDArray.class));
 
     }

From 2aed216c2a880174266ef52df9ff7edac5337886 Mon Sep 17 00:00:00 2001
From: shugeo <sgazeos@gmail.com>
Date: Wed, 27 May 2020 21:15:03 +0300
Subject: [PATCH 15/21] Eliminated error with resize implementation. (#418)

* Eliminated error with resize implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored resize caller implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored image.resize op helper.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added dumb implementations for missed resize methods.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added resize_images op. Refactored image_resize op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matrix_band_part op and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored resize_images op to comply with preserve_aspect_ratio flag properly.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored resize_images and tests for resizeArea method.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored resize methods and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added new methods for TF2 resize op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Portion of resize algorithms from TF2

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added routine to process resize with given algorithm.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added new image resize via scale and translate process helper.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Cpu implementation for V2 image resize operation helpers.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added implementation for lancos5 algorithm of resize and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added prints for span computing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* The first working implementation and tests for lancos5 resize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste prints.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored image_resize op and tests."

Signed-off-by: shugeo <sgazeos@gmail.com>

* Lanczos3 resize implementation and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Implemented bicubic resize algorithm and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a couple of tests and cosmetic changes with image resize helper.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added bilinear implementation for image resize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored bicubic algorithm and also implement area and neighbor algoritms for image resize on cpu arch.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a couple of tests for nearest neighbor and area resize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Cosmetic changes for cpu implementation and added cuda implementation for resize methods.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Separated cuda implementation of v2 image resize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added kernels for span calculation and span gathering with new image resize cuda implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored cuda implementation of image resize kernels.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished the first working implementation of image resize op and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed resize_images and image_resize ops.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored shape construction and output validation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test to properly initalized with float.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added 3D input opotunity for resize ops.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test for resize_images op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test and call for resize_images op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored image_resize op output data type handling for nearest neighbors method and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed issue with wrong resize method.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added checkup for wrong resize methods for resize ops.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored resize methods and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added output data type validation for given resize method.

Signed-off-by: shugeo <sgazeos@gmail.com>

* - ResizeMethod rearranged in order to match C++ side
- minor test fix

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Refactored resize_images op.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: raver119@gmail.com <raver119@gmail.com>
---
 .../generic/images/image_resize.cpp           |  58 +-
 .../generic/images/resize_images.cpp          | 135 +++++
 .../generic/linalg/matrix_band_part.cpp       |  26 +-
 .../include/ops/declarable/headers/images.h   | 155 +++++-
 .../ops/declarable/headers/parity_ops.h       | 124 -----
 .../declarable/helpers/cpu/image_resize.cpp   | 399 +++++++++++++-
 .../declarable/helpers/cuda/image_resize.cu   |  25 +-
 .../helpers/cuda/image_resize_v2.cu           | 497 ++++++++++++++++++
 .../ops/declarable/helpers/image_resize.h     |  17 +-
 .../layers_tests/DeclarableOpsTests10.cpp     |  88 ++++
 .../layers_tests/DeclarableOpsTests11.cpp     |  34 +-
 .../layers_tests/DeclarableOpsTests12.cpp     | 325 ++++++++++++
 .../org/nd4j/enums/ImageResizeMethod.java     |  15 +-
 .../java/org/nd4j/nativeblas/Nd4jCuda.java    |  14 +-
 .../java/org/nd4j/nativeblas/Nd4jCpu.java     | 266 ++--------
 .../opvalidation/TransformOpValidation.java   |   6 +-
 16 files changed, 1753 insertions(+), 431 deletions(-)
 create mode 100644 libnd4j/include/ops/declarable/generic/images/resize_images.cpp
 create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu

diff --git a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
index 3ceba93d8..4e680b337 100644
--- a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2019 Konduit K.K.
+ * Copyright (c) 2020 Konduit K.K.
  *
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
@@ -32,57 +32,65 @@ namespace sd {
             auto size = INPUT_VARIABLE(1);
 
             auto output = OUTPUT_VARIABLE(0);
+
             int width;
             int height;
-            bool preserveAspectRatio = false; // - default value
             bool antialias = false;
-            REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_bilinear: Resize params is a pair of values, not %lld.", size->lengthOf());
-            width = size->e<int>(0);
-            height = size->e<int>(1);
-            if (block.getBArguments()->size()) {
-                preserveAspectRatio = B_ARG(0);
-                if (block.getBArguments()->size() > 1)
-                    antialias = B_ARG(1);
+            REQUIRE_TRUE(size->lengthOf() == 2, 0, "image_resize: Resize params is a pair of values, not %lld.", size->lengthOf());
+            width = size->e<int>(1);
+            height = size->e<int>(0);
+            if (block.numB() == 2) {
+                antialias = B_ARG(1);
             }
 
             auto method = helpers::ImageResizeMethods::kResizeBilinear;
             if (block.numI() == 1) {
                 method = (helpers::ImageResizeMethods)INT_ARG(0);
             }
+            REQUIRE_TRUE(method == helpers::ImageResizeMethods::kResizeNearest || output->dataType() == DataType::FLOAT32, 0, "image_resize: Output data type should be FLOAT32 for this method %i", (int)method );
+            REQUIRE_TRUE(method >= helpers::ImageResizeMethods::kResizeFirst && method <= helpers::ImageResizeMethods::kResizeLast, 0, "image_resize: Resize method should be between %i and %i, but %i was given.", (int)helpers::ImageResizeMethods::kResizeFirst, (int)helpers::ImageResizeMethods::kResizeLast, (int)method);
+            auto inRank = image->rankOf();
+            REQUIRE_TRUE(inRank >=3 && inRank <=4, 0, "image_resize: Input rank should be 4 or 3, but %i given.", image->rankOf());
+            auto source = inRank == 4?image->reshape(image->ordering(), {image->sizeAt(0), image->sizeAt(1), image->sizeAt(2), image->sizeAt(3)}):image->reshape(image->ordering(), {1, image->sizeAt(0), image->sizeAt(1), image->sizeAt(2)});
+            auto target = inRank == 4?output->reshape(output->ordering(), {output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3)}, false) : output->reshape(output->ordering(), {1, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2)}, false);
 
-            return helpers::resizeFunctor(block.launchContext(), image, width, height, method, preserveAspectRatio, antialias, output);
+            return helpers::resizeFunctor(block.launchContext(), image, width, height, method, antialias, output);
         }
 
         DECLARE_SHAPE_FN(image_resize) {
-            auto shapeList = SHAPELIST(); 
             auto in = inputShape->at(0);
 
             Nd4jLong* outputShape;
+            auto method = helpers::ImageResizeMethods::kResizeBilinear;
+            if (block.numI() == 1) {
+                method = (helpers::ImageResizeMethods)INT_ARG(0);
+            }
 
             int width;
             int height;
+            double ratio = shape::sizeAt(in, 1) / (0.0 + shape::sizeAt(in, 2));
             auto newImageSize = INPUT_VARIABLE(1);
             REQUIRE_TRUE(newImageSize->lengthOf() == 2, 0, "resize_bilinear: Resize params is a pair of values, not %i.", newImageSize->lengthOf());
             REQUIRE_TRUE(block.numI() <= 1, 0, "resize_bilinear: Resize params already given by the second param. Int params are expensive.");
-            width = newImageSize->e<int>(0);
-            height = newImageSize->e<int>(1);
-            
-            ALLOCATE(outputShape, block.getWorkspace(), shape::shapeInfoLength(4), Nd4jLong);
-            outputShape[0] = 4;
-            outputShape[1] = in[1];
-            outputShape[2] = width;
-            outputShape[3] = height;
-            outputShape[4] = in[4];
-            ShapeUtils::updateStridesAndType(outputShape, in, shape::order(in));
+            width = newImageSize->e<int>(1);
+            height = newImageSize->e<int>(0);
+            if (block.numB() > 0) {
+                if (B_ARG(0)) {
+                    width = math::nd4j_ceil<double, int>(height / ratio);
+                }
+            }
+            auto dtype = DataType::FLOAT32;
+            if (method == helpers::ImageResizeMethods::kResizeNearest)
+                dtype = ArrayOptions::dataType(in);
+            auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector<Nd4jLong>{in[1], height, width, in[4]}:std::vector<Nd4jLong>{ height, width, in[4]});
 
-            shapeList->push_back(CONSTANT(outputShape));
-            return shapeList;
+            return SHAPELIST(shape);
         }
         DECLARE_TYPES(image_resize) {
             getOpDescriptor()
-                    ->setAllowedInputTypes(0, {ALL_FLOATS})
+                    ->setAllowedInputTypes(0, {ALL_INTS, ALL_FLOATS})
                     ->setAllowedInputTypes(1, {ALL_INTS})
-                    ->setAllowedOutputTypes({ALL_FLOATS});
+                    ->setAllowedOutputTypes({ALL_FLOATS, ALL_INTS});
         }
 
     }
diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
new file mode 100644
index 000000000..c3f9ae8f1
--- /dev/null
+++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
@@ -0,0 +1,135 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author sgazeos@gmail.com
+//
+
+#include <system/op_boilerplate.h>
+#if NOT_EXCLUDED(OP_resize_images)
+
+#include <ops/declarable/CustomOperations.h>
+#include <ops/declarable/helpers/image_resize.h>
+
+namespace sd {
+    namespace ops {
+        CUSTOM_OP_IMPL(resize_images, 1, 1, false, 0, 0) {
+
+            auto image = INPUT_VARIABLE(0);
+
+            auto output = OUTPUT_VARIABLE(0);
+            int width = output->sizeAt(2);
+            int height = output->sizeAt(1);
+            int method = helpers::ImageResizeMethods::kResizeBilinear;
+            if (block.width() > 1) {
+                auto size = INPUT_VARIABLE(1);
+                REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_images: Resize params is a pair of values, not %lld.", size->lengthOf());
+//                width = size->e<int>(1);
+//                height = size->e<int>(0);
+                if (block.width() > 2) {
+                    auto methodT = INPUT_VARIABLE(2);
+
+                    REQUIRE_TRUE(methodT->isZ() && methodT->isScalar(), 0, "resize_images: Method tensor should be integer scalar, but rank of %i tensor given.", methodT->rankOf());
+                    method = methodT->e<int>(0);
+                }
+                else if (block.numI() == 1) {
+                    method = I_ARG(0);
+                }
+            }
+            else {
+                REQUIRE_TRUE(block.numI() > 1 && block.numI() < 4, 0, "resize_images: Method and size should be given properly.");
+                if(block.numI() == 3) { // full stack of args
+//                    height = I_ARG(0);
+//                    width = I_ARG(1);
+                    method = I_ARG(2);
+                }
+                else if (block.numI() == 2) {
+//                    height = I_ARG(0);
+//                    width = I_ARG(1);
+                }
+            }
+            bool preserveAspectRatio = false; // - default value
+            bool alignCorners = false;
+            if (block.numB()) {
+                alignCorners = B_ARG(0);
+                if (block.numB() > 1)
+                    preserveAspectRatio = B_ARG(1);
+            }
+            REQUIRE_TRUE(method >= helpers::ImageResizeMethods::kResizeFirst && method <= helpers::ImageResizeMethods::kResizeOldLast, 0, "resize_images: Resize method should be between %i and %i, but %i was given.", (int)helpers::ImageResizeMethods::kResizeFirst, (int)helpers::ImageResizeMethods::kResizeOldLast, (int)method);
+            REQUIRE_TRUE(method == helpers::ImageResizeMethods::kResizeNearest || output->dataType() == DataType::FLOAT32, 0, "image_resize: Output data type should be FLOAT32 for this method %i", (int)method );
+
+            auto inRank = image->rankOf();
+            REQUIRE_TRUE(inRank >=3 && inRank <=4, 0, "image_resize: Input rank should be 4 or 3, but %i given.", inRank);
+
+            auto source = inRank == 4?image->reshape(image->ordering(), {image->sizeAt(0), image->sizeAt(1), image->sizeAt(2), image->sizeAt(3)}):image->reshape(image->ordering(), {1, image->sizeAt(0), image->sizeAt(1), image->sizeAt(2)});
+            auto target = inRank == 4?output->reshape(output->ordering(), {output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3)}, false) : output->reshape(output->ordering(), {1, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2)}, false);
+
+            return helpers::resizeImagesFunctor(block.launchContext(), &source, width, height, (helpers::ImageResizeMethods)method, alignCorners, &target);
+        }
+
+        DECLARE_SHAPE_FN(resize_images) {
+            auto shapeList = SHAPELIST(); 
+            auto in = inputShape->at(0);
+
+            Nd4jLong* outputShape;
+
+            int width;
+            int height;
+            if (block.width() > 1) {
+                auto size = INPUT_VARIABLE(1);
+                REQUIRE_TRUE(size->lengthOf() == 2, 0, "resize_images: Resize params is a pair of values, not %lld.", size->lengthOf());
+                width = size->e<int>(1);
+                height = size->e<int>(0);
+            }
+            else {
+                REQUIRE_TRUE(block.numI() > 1 && block.numI() < 4, 0, "resize_images: Method and size should be given properly.");
+                if(block.numI() == 3) { // full stack of args
+                    height = I_ARG(0);
+                    width = I_ARG(1);
+                }
+                else if (block.numI() == 2) {
+                    height = I_ARG(0);
+                    width = I_ARG(1);
+                }
+            }
+
+            double ratio = shape::sizeAt(in, 1) / (0.0 + shape::sizeAt(in, 2));
+            if (block.numB() > 1) {
+                if (B_ARG(1)) {
+                    width = math::nd4j_ceil<double, int>(height / ratio);
+                }
+            }
+
+            std::vector<Nd4jLong> shape;
+            if (shape::rank(in) == 4)
+                shape = {in[1], height, width, in[4]};
+            else if (shape::rank(in) == 3)
+                shape = {height, width, in[3]};
+
+            auto outShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, shape::order(in), shape);
+            return SHAPELIST(outShape);
+        }
+        DECLARE_TYPES(resize_images) {
+            getOpDescriptor()
+                    ->setAllowedInputTypes(0, {ALL_FLOATS, ALL_INTS})
+                    ->setAllowedInputTypes(1, {ALL_INTS})
+                    ->setAllowedOutputTypes({DataType::FLOAT32});
+        }
+
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp
index 08e059c37..51beff4c8 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_band_part.cpp
@@ -25,14 +25,27 @@
 
 namespace sd {
     namespace ops {
-        CONFIGURABLE_OP_IMPL(matrix_band_part, 1, 1, true, 0, 2) {
+        CONFIGURABLE_OP_IMPL(matrix_band_part, 1, 1, true, 0, 0) {
 
             auto input = INPUT_VARIABLE(0);
 
             auto output   = OUTPUT_VARIABLE(0);
-            Nd4jLong minLower = INT_ARG(0);
-            Nd4jLong maxUpper = INT_ARG(1);
 
+            Nd4jLong minLower(0LL);
+            Nd4jLong maxUpper(0LL);
+            if (block.width() == 1) {
+                REQUIRE_TRUE(block.numI() == 2, 0, "matrix_band_part: min and max band numbers should be given before.");
+                minLower = INT_ARG(0);
+                maxUpper = INT_ARG(1);
+            }
+            else {
+                REQUIRE_TRUE(block.width() == 3, 0, "matrix_band_part: min and max band numbers should be given as scalars before.");
+                auto minLowerT = INPUT_VARIABLE(1);
+                auto maxUpperT = INPUT_VARIABLE(2);
+                REQUIRE_TRUE(minLowerT->isScalar() && maxUpperT->isScalar(), 0, "matrix_band_part: min and max should be scalars, but %i and %i ranks given", minLowerT->rankOf(), maxUpperT->rankOf());
+                minLower = minLowerT->e<Nd4jLong>(0);
+                maxUpper = maxUpperT->e<Nd4jLong>(0);
+            }
             REQUIRE_TRUE(input->rankOf() >= 2, 0, "matrix_band_part: Input rank should be 2 or greater.");
             Nd4jLong N = input->sizeAt(-2);
             Nd4jLong M = input->sizeAt(-1);
@@ -49,9 +62,10 @@ namespace sd {
 
     DECLARE_TYPES(matrix_band_part) {
         getOpDescriptor()
-            ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS})
-            ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS})
-            ->setSameMode(true);
+            ->setAllowedInputTypes(0, {ALL_INTS, ALL_FLOATS})
+            ->setAllowedInputTypes(1, {ALL_INTS})
+            ->setAllowedInputTypes(2, {ALL_INTS})
+            ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS});
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/headers/images.h b/libnd4j/include/ops/declarable/headers/images.h
index 41974901a..aa2114540 100644
--- a/libnd4j/include/ops/declarable/headers/images.h
+++ b/libnd4j/include/ops/declarable/headers/images.h
@@ -85,6 +85,7 @@ namespace ops {
      */
 #if NOT_EXCLUDED(OP_rgb_to_yuv)
     DECLARE_CONFIGURABLE_OP(yuv_to_rgb, 1, 1, true, 0, 0);
+#endif
 
 /**
 * Rgb To Yiq
@@ -108,8 +109,156 @@ namespace ops {
     DECLARE_CONFIGURABLE_OP(yiq_to_rgb, 1, 1, true, 0, 0);
 #endif
 
-}
-}
+/**
+ * resize_images - resize image with given size and method
+ *    there are 4 methods allowed: RESIZE_BILINEAR(0), RESIZE_NEIGHBOR(1), RESIZE_AREA(2) and RESIZE_BICUBIC(3)
+ * inputs:
+ *      0 - 4D tensor with shape {batch, height, width, channels}
+ *      1 - 1D integer tensor with {new_height, new_width} (optional)
+ *      2 - 0D integer tensor with method (0 to 3) (optional)
+ *
+ * int args:
+ *      0 - new_height
+ *      1 - new_width
+ *      2 - method
+ *
+ * bool args:
+ *      0 - align corners (default false) - optional
+ *      1 - preserve_aspect_ratio (default false) - optional
+ *
+ * CAUTION: one of methods can be used to give size and method - as tensors or as int args only
+ *
+ * output:
+ *      0 - 4D float32 tensor with shape {batch, new_height, new_width, channels}
+ *
+ */
+#if NOT_EXCLUDED(OP_resize_images)
+    DECLARE_CUSTOM_OP(resize_images, 1,1,false, 0, 0);
+#endif
 
-#endif
+   /**
+    * This op make bilinear or nearest neighbor interpolated resize for given tensor
+    *
+    * input array:
+    *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type
+    *    1 - 2D-Tensor with shape (num_boxes, 4) float type
+    *    2 - 1D-Tensor with shape (num_boxes) int type
+    *    3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type
+    *
+    * float arguments (optional)
+    *   0 - exprapolation_value (optional) default 0.f
+    *
+    * int arguments: (optional)
+    *   0 - mode (default 0 - bilinear interpolation)
+    *
+    * output array:
+    *   the 4D-Tensor with resized to crop_size images given - float type
+    */
+    #if NOT_EXCLUDED(OP_crop_and_resize)
+    DECLARE_CUSTOM_OP(crop_and_resize, 4, 1, false, -1, -1);
+    #endif
+
+   /**
+    * This op make bilinear interpolated resize for given tensor
+    *
+    * input array:
+    *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
+    *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
+    *
+    * int arguments: (optional)
+    *   0 - new width
+    *   1 - new height
+    *
+    * output array:
+    *   the 4D-Tensor with calculated backproped dots
+    *
+    * CAUTION: either size tensor or a pair of int params should be provided.
+    */
+
+    #if NOT_EXCLUDED(OP_resize_bilinear)
+    DECLARE_CUSTOM_OP(resize_bilinear, 1, 1, false, 0, -2);
+    #endif
+
+   /**
+    * This op make nearest neighbor interpolated resize for given tensor
+    *
+    * input array:
+    *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
+    *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
+    *
+    * int arguments: (optional)
+    *   0 - new width
+    *   1 - new height
+    *
+    * output array:
+    *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
+    *
+    * CAUTION: either size tensor or a pair of int params should be provided.
+    */
+
+    #if NOT_EXCLUDED(OP_resize_nearest_neighbor)
+    DECLARE_CUSTOM_OP(resize_nearest_neighbor, 1, 1, false, 0, -2);
+    #endif
+
+   /**
+    * This op make bicubic interpolated resize for given tensor
+    *
+    * input array:
+    *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
+    *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
+    *
+    * output array:
+    *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
+    *
+    */
+    #if NOT_EXCLUDED(OP_resize_bicubic)
+    DECLARE_CUSTOM_OP(resize_bicubic, 1, 1, false, 0, -2);
+    #endif
+
+   /**
+    * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor
+    *
+    * input array:
+    *    0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
+    *    1 - size -   1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided).
+    *
+    * int args: - proveded only when size tensor is missing
+    *    0 - new height
+    *    1 - new width
+    * boolean args:
+    *    0 - align_corners - optional (default is false)
+    *
+    * output array:
+    *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
+    *
+    */
+    #if NOT_EXCLUDED(OP_resize_area)
+    DECLARE_CUSTOM_OP(resize_area, 1, 1, false, 0, -2);
+    #endif
+
+   /**
+    * This op make interpolated resize for given tensor with given algorithm.
+    * Supported algorithms are bilinear, bicubic, nearest_neighbor, lanczos5, gaussian, area and mitchellcubic.
+    *
+    * input array:
+    *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
+    *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
+    *
+    * optional int args:
+    *    0 - algorithm - bilinear by default
+    * optional bool args:
+    *    0 - preserve_aspect_ratio - default False
+    *    1 - antialias - default False
+    *
+    * output array:
+    *   the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels})
+    *
+    */
+
+    #if NOT_EXCLUDED(OP_image_resize)
+    DECLARE_CUSTOM_OP(image_resize, 2, 1, false, 0, 0);
+    #endif
+
+}
+}
 #endif
diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h
index 74221133c..27c012214 100644
--- a/libnd4j/include/ops/declarable/headers/parity_ops.h
+++ b/libnd4j/include/ops/declarable/headers/parity_ops.h
@@ -1771,130 +1771,6 @@ namespace sd {
         DECLARE_CUSTOM_OP(reduce_logsumexp, 1, 1, false, 0, 0);
         #endif
 
-       /**
-        * This op make bilinear or nearest neighbor interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type
-        *    1 - 2D-Tensor with shape (num_boxes, 4) float type
-        *    2 - 1D-Tensor with shape (num_boxes) int type
-        *    3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type
-        *
-        * float arguments (optional)
-        *   0 - exprapolation_value (optional) default 0.f
-        *
-        * int arguments: (optional)
-        *   0 - mode (default 0 - bilinear interpolation)
-        *
-        * output array:
-        *   the 4D-Tensor with resized to crop_size images given - float type
-        */
-        #if NOT_EXCLUDED(OP_crop_and_resize)
-        DECLARE_CUSTOM_OP(crop_and_resize, 4, 1, false, -1, -1);
-        #endif
-
-       /**
-        * This op make bilinear interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
-        *
-        * int arguments: (optional)
-        *   0 - new width
-        *   1 - new height
-        *
-        * output array:
-        *   the 4D-Tensor with calculated backproped dots
-        *
-        * CAUTION: either size tensor or a pair of int params should be provided.
-        */
-
-        #if NOT_EXCLUDED(OP_resize_bilinear)
-        DECLARE_CUSTOM_OP(resize_bilinear, 1, 1, false, 0, -2);
-        #endif
-
-       /**
-        * This op make nearest neighbor interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
-        *
-        * int arguments: (optional)
-        *   0 - new width
-        *   1 - new height
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        * CAUTION: either size tensor or a pair of int params should be provided.
-        */
-
-        #if NOT_EXCLUDED(OP_resize_nearest_neighbor)
-        DECLARE_CUSTOM_OP(resize_nearest_neighbor, 1, 1, false, 0, -2);
-        #endif
-
-       /**
-        * This op make bicubic interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-        #if NOT_EXCLUDED(OP_resize_bicubic)
-        DECLARE_CUSTOM_OP(resize_bicubic, 1, 1, false, 0, -2);
-        #endif
-
-       /**
-        * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor
-        *
-        * input array:
-        *    0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - size -   1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided).
-        *
-        * int args: - proveded only when size tensor is missing
-        *    0 - new height
-        *    1 - new width
-        * boolean args:
-        *    0 - align_corners - optional (default is false)
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-        #if NOT_EXCLUDED(OP_resize_area)
-        DECLARE_CUSTOM_OP(resize_area, 1, 1, false, 0, -2);
-        #endif
-
-       /**
-        * This op make interpolated resize for given tensor with given algorithm.
-        * Supported algorithms are bilinear, bicubic, nearest_neighbor.
-        * Need to implement to full compatibility with TF: lanczos5, gaussian, area and mitchellcubic
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
-        *
-        * optional int args:
-        *    0 - algorithm - bilinear by default
-        * optional bool args:
-        *    0 - preserve_aspect_ratio - default False
-        *    1 - antialias - default False
-        *
-        * output array:
-        *   the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-
-        #if NOT_EXCLUDED(OP_image_resize)
-        DECLARE_CUSTOM_OP(image_resize, 2, 1, false, 0, 0);
-        #endif
-
        /**
         * Copy a tensor setting everything outside a central band in each innermost matrix
         *
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
index 68b2130ac..7206b03e5 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp
@@ -418,17 +418,17 @@ namespace helpers {
         // Allocate and initialize coefficients table using Bicubic
         // convolution algorithm.
         // https://en.wikipedia.org/wiki/Bicubic_interpolation
-        float* coeffs_table = new float[(kTableSize + 1) * 2];
+        float* coeffsTable = new float[(kTableSize + 1) * 2];
         auto func = PRAGMA_THREADS_FOR {
             for (auto i = start; i <= stop; ++i) {
                 float x = i * 1.0 / kTableSize;
-                coeffs_table[i * 2] = ((a + 2) * x - (a + 3)) * x * x + 1;
+                coeffsTable[i * 2] = ((a + 2) * x - (a + 3)) * x * x + 1;
                 x += 1.0;
-                coeffs_table[i * 2 + 1] = ((a * x - 5 * a) * x + 8 * a) * x - 4 * a;
+                coeffsTable[i * 2 + 1] = ((a * x - 5 * a) * x + 8 * a) * x - 4 * a;
             }
         };
         samediff::Threads::parallel_for(func, 0, kTableSize);
-        return coeffs_table;
+        return coeffsTable;
     }
 
     const float* getCoeffsTable(const bool use_keys_cubic) {
@@ -988,25 +988,392 @@ namespace helpers {
             return res;
     }
 
-    int resizeAreaFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
-                              bool const alignCorners, NDArray* output) {
+    int resizeAreaFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const alignCorners, NDArray* output) {
         BUILD_SINGLE_SELECTOR(image->dataType(), return resizeAreaFunctor_, (context, image, width, height, alignCorners, output), NUMERIC_TYPES);
     }
 
+    /**
+     * resize as TF v.2.x implemented (with preserve aspect ratio and antialias flags routines
+     * */
+    // An interface for integrated scale functors.
+    struct IKernelFunc {
+        virtual float operator()(float x) const = 0;
+        virtual float radius() const = 0;
+    };
+
+    struct LanczosKernelFunc : public IKernelFunc {
+        // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc.
+        explicit LanczosKernelFunc(float const radius) : _radius(radius) {}
+        float operator()(float x) const {
+            float const kPI = 3.141592653589793f;
+            x = math::nd4j_abs(x);
+            if (x > _radius) return 0.f;
+            // Need to special case the limit case of sin(x) / x when x is zero.
+            if (x <= 1.e-3f) {
+                return 1.f;
+            }
+            return _radius * std::sin(kPI * x) * std::sin(kPI * x / _radius) / (kPI * kPI * x * x);
+        }
+        float radius() const { return _radius; }
+        const float _radius;
+    };
+
+    struct GaussianKernelFunc : public IKernelFunc {
+        static constexpr float kRadiusMultiplier = 3.0f;
+        // https://en.wikipedia.org/wiki/Gaussian_function
+        // We use sigma = 0.5, as suggested on p. 4 of Ken Turkowski's "Filters
+        // for Common Resampling Tasks" for kernels with a support of 3 pixels:
+        // www.realitypixels.com/turk/computergraphics/ResamplingFilters.pdf
+        // This implies a radius of 1.5,
+        explicit GaussianKernelFunc(float radius = 1.5f)
+                : _radius(radius), _sigma(radius / kRadiusMultiplier) {}
+        float operator()(float x) const {
+            x = math::nd4j_abs(x);
+            if (x >= _radius) return 0.0f;
+            return std::exp(-x * x / (2.0 * _sigma * _sigma));
+        }
+        float radius() const { return _radius; }
+        const float _radius;
+        const float _sigma;  // Gaussian standard deviation
+    };
+
+    struct BoxKernelFunc : public IKernelFunc {
+        float operator()(float x) const {
+            x = math::nd4j_abs(x);
+            return x < 0.5f ? 1.f : x == 0.5f ? 0.5f : 0.f;
+        }
+        float radius() const { return 1.f; }
+    };
+
+    struct TriangleKernelFunc : public IKernelFunc {
+        // https://en.wikipedia.org/wiki/Triangle_function
+        float operator()(float x) const {
+            x = math::nd4j_abs(x);
+            return x < 1.f ? 1.f - x : 0.f;
+        }
+        float radius() const { return 1.f; }
+    };
+
+    struct KeysCubicKernelFunc : public IKernelFunc {
+        // http://ieeexplore.ieee.org/document/1163711/
+        // R. G. Keys. Cubic convolution interpolation for digital image
+        // processing. IEEE Transactions on Acoustics, Speech, and Signal
+        // Processing, 29(6):1153–1160, 1981.
+        float operator()(float x) const {
+            x = math::nd4j_abs(x);
+            if (x >= 2.0f) {
+                return 0.0f;
+            } else if (x >= 1.0f) {
+                return ((-0.5f * x + 2.5f) * x - 4.0f) * x + 2.0f;
+            } else {
+                return ((1.5f * x - 2.5f) * x) * x + 1.0f;
+            }
+        }
+        float radius() const { return 2.f; }
+    };
+
+    struct MitchellCubicKernelFunc : public IKernelFunc {
+        // https://doi.org/10.1145/378456.378514
+        // D. P. Mitchell and A. N. Netravali. Reconstruction filters in computer
+        // graphics.  Computer Graphics (Proceedings of ACM SIGGRAPH 1988),
+        // 22(4):221–228, 1988.
+        float operator()(float x) const {
+            x = math::nd4j_abs(x);
+            if (x >= 2.f) {
+                return 0.f;
+            } else if (x >= 1.f) {
+                return (((-7.f / 18.f) * x + 2.f) * x - 10.f / 3.f) * x + 16.f / 9.f;
+            } else {
+                return (((7.f / 6.f) * x - 2.f) * x) * x + 8.f / 9.f;
+            }
+        }
+        float radius() const { return 2.f; }
+    };
+
+    // A pre-computed span of pixels along a single dimension.
+    // The output pixel will be the weighted sum of pixels starting from start.
+    struct Spans {
+        // The maximum span size of any output pixel.
+        int _spanSize;
+        // int32 tensor with shape {outputSize}.
+        NDArray _starts;
+
+        // float32 tensor of size {outputSize, spanSize}.
+        // The output pixel at x is computed as:
+        //   dot_product(input[starts[x]:starts[x]+span_size], weights[x]).
+        NDArray _weights;
+    };
+
+    static int
+    computeSpans(IKernelFunc* kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans) {
+        // When sampling, we need the inverse scale and translation, to map from an
+        // output to an input pixel.
+        float const invScale = 1.f / scale;
+        float const invTranslate = -invScale * translate;
+        // When downsampling the kernel should be scaled since we want to low pass
+        // filter and interpolate, but when upsampling it should not be since we only
+        // want to interpolate.
+        float  const kernelScale = antialias ? math::nd4j_max(invScale, 1.f) : 1.f;
+        spans._spanSize = math::nd4j_min(2 * static_cast<int>(std::ceil(kernel->radius() * kernelScale)) + 1, static_cast<int>(inSize));
+        spans._starts = NDArrayFactory::create<int>('c', {outSize});
+        spans._weights = NDArrayFactory::create<float>('c', {outSize, spans._spanSize});
+
+        auto startsVec = spans._starts.bufferAsT<int>();
+        auto weightsVector = spans._weights.bufferAsT<float>();
+        spans._weights.nullify();
+
+        const float invKernelScale = 1.f / kernelScale;
+        int maxSpanSize = 0;
+        std::vector<float> tempWeights;
+
+        // return value if within bounds or bounds otherwise
+        auto boundsAmp = [](Nd4jLong  const low, Nd4jLong const high, Nd4jLong const value) {
+            if (high < value) return high;
+            if (value < low) return low;
+            return value;
+        };
+
+        for (auto x = 0LL; x < outSize; ++x) {
+            const float columnFloat = x + 0.5f;
+            const float sampleFloat = columnFloat * invScale + invTranslate;
+
+            // Don't sample when the sampling location is outside the source image.
+            if (sampleFloat < 0 || sampleFloat > inSize) {
+                // Add an empty span.
+                startsVec[x] = 0;
+                continue;
+            }
+            Nd4jLong spanStart = math::nd4j_ceil<float,float>(sampleFloat - kernel->radius() * kernelScale - 0.5f);
+            Nd4jLong spanEnd = math::nd4j_floor<float, float>(sampleFloat + kernel->radius() * kernelScale - 0.5f);
+            spanStart = boundsAmp(0LL, inSize - 1, spanStart);
+            spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1;
+            int const spanSize = spanEnd - spanStart;
+            if (spanSize > spans._spanSize) {
+                return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize));
+            }
+            float totalWeightSum = 0.f;
+            tempWeights.clear();
+            for (int source = spanStart; source < spanEnd; ++source) {
+                float kernelPos = static_cast<float>(source) + 0.5f - sampleFloat;
+                float weight = (*kernel)(kernelPos * invKernelScale);
+                totalWeightSum += weight;
+                tempWeights.push_back(weight);
+            }
+            maxSpanSize = std::max(maxSpanSize, spanSize);
+            if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min<float>()) { //
+                auto totalWeightSumInverted = 1.0f / totalWeightSum;
+                auto outIndex = spans._spanSize * x;
+                for (auto weight : tempWeights) {
+                    weightsVector[outIndex] = weight * totalWeightSumInverted;
+                    ++outIndex;
+                }
+            }
+            startsVec[x] = spanStart;
+        }
+        return Status::OK();
+    }
+
+    template <typename X, typename Z>
+    static void gatherRows(int const spanSize, int const* starts, Z const* weights, X const* imagePtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight,
+                           Nd4jLong const outputWidth, Nd4jLong const channels, Z* outputPtr) {
+        auto inRowSize = inputWidth * channels;
+        auto outRowSize = outputWidth * channels;
+
+        auto addScaledVector = [](const X* inVector, int vectorLen, Z weight, Z* outVector) {
+            Z* outVecEnd = outVector + vectorLen;
+            for (; outVector != outVecEnd; ++outVector, ++inVector) {
+                *outVector += weight * static_cast<Z>(*inVector);
+            }
+        };
+
+        for (int y = 0; y < outputHeight; ++y) {
+            Z* outRowData = outputPtr + outRowSize * y;
+            memset(outRowData, '\0', outRowSize * sizeof(Z));//            std::fill(outRowData, outRowData + outRowSize, 0.f);
+            int inRow = starts[y];
+            auto inRowData = imagePtr + inRowSize * inRow;
+            auto weightsStart = weights + y * spanSize;
+            auto realSpanSize = math::nd4j_min(starts[y] + spanSize, static_cast<int>(inputHeight)) - starts[y];
+            auto weightsEnd = weightsStart + realSpanSize;
+            for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) {
+                addScaledVector(inRowData, inRowSize, *weightPtr, outRowData);
+                inRowData += inRowSize;
+            }
+        }
+    }
+
+    template <typename Z>
+    static void gatherColumns(int const spanSize, int const* starts, Z const* weights, Z const* imagesPtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, Nd4jLong const outputWidth, Nd4jLong channels, Z* outputPtr) {
+        auto inRowSize = inputWidth * channels;
+        auto outRowSize = outputWidth * channels;
+
+        for (auto y = 0LL; y < outputHeight; ++y) {
+            auto inputRowStart = imagesPtr + inRowSize * y;
+            auto outPixels = outputPtr + outRowSize * y;
+            for (auto x = 0LL; x < outputWidth; ++x, outPixels += channels) {
+                auto inPixels = inputRowStart + starts[x] * channels;
+                auto weightsStart = weights + x * spanSize;
+                auto realSpanSize = math::nd4j_min(starts[x] + spanSize, static_cast<int>(inputWidth)) - starts[x];
+                auto weightsEnd = weightsStart + realSpanSize;
+                for (int c = 0; c < channels; ++c) {
+                    outPixels[c] = 0.0f;
+                }
+                for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) {
+                    Z w = *weightPtr;
+                    for (int c = 0; c < channels; ++c) {
+                        outPixels[c] += w * static_cast<Z>(inPixels[c]);
+                    }
+                    inPixels += channels;
+                }
+            }
+        }
+    }
+
+    template <typename X, typename Z>
+    static void gatherSpans(int const rowSpanSize, NDArray const& rowStarts, NDArray const& rowWeights, int const colSpanSize, NDArray const& columnStarts, NDArray const& columnWeights, NDArray const* images, NDArray& intermediate, NDArray* output) {
+        auto batchSize = images->sizeAt(0);
+        auto inputHeight = images->sizeAt(1);
+        auto inputWidth = images->sizeAt(2);
+        auto channels = images->sizeAt(3);
+
+        auto outputHeight = output->sizeAt(1);
+        auto outputWidth = output->sizeAt(2);
+
+        auto inputPixPerBatch = inputWidth * inputHeight * channels;
+        auto intermediatePixPerBatch = inputWidth * outputHeight * channels;
+        auto outputPixPerBatch = outputWidth * outputHeight * channels;
+        Z* intermediatePtr = intermediate.bufferAsT<Z>();
+
+        const X* imagePtr = images->bufferAsT<X>();
+        Z* outPtr = output->bufferAsT<Z>();
+        for (int b = 0; b < batchSize; ++b, imagePtr += inputPixPerBatch,
+                                            intermediatePtr += intermediatePixPerBatch,
+                                            outPtr += outputPixPerBatch) {
+            gatherRows<X,Z>(rowSpanSize, rowStarts.bufferAsT<int>(), rowWeights.bufferAsT<Z>(),
+                            imagePtr, inputHeight, inputWidth, outputHeight,
+                            inputWidth, channels, intermediatePtr);
+            gatherColumns<Z>(colSpanSize, columnStarts.bufferAsT<int>(), columnWeights.bufferAsT<Z>(),
+                               intermediatePtr, outputHeight, inputWidth, outputHeight, outputWidth, channels, outPtr);
+        }
+    }
+
+    template <typename X, typename Z>
+    static int resizeKernel(IKernelFunc* transformationKernel, NDArray const* input, Nd4jLong outWidth, Nd4jLong outHeight, bool antialias, NDArray* output) {
+        Nd4jLong const batchSize = input->sizeAt(0);
+        Nd4jLong const inputHeight = input->sizeAt(1);
+        Nd4jLong const inputWidth = input->sizeAt(2);
+        Nd4jLong const channels = input->sizeAt(3);
+
+        Z rowScale = Z(outHeight) / Z(inputHeight);
+        Z columnScale = Z(outWidth) / Z(inputWidth);
+
+        // Return if the output is empty.
+        if (output->lengthOf() == 0) return Status::OK();
+
+        Spans colSpans;
+
+        auto res = computeSpans(transformationKernel, outWidth, inputWidth, columnScale, 0.f, antialias, colSpans);
+        if (res != Status::OK()) return res;
+        Spans rowSpans;
+        res = computeSpans(transformationKernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        NDArray intermediate = NDArrayFactory::create<Z>('c', {batchSize, outHeight, inputWidth, channels});
+
+        //const functor::Spans& const_row_spans = row_spans;
+        //typename TTypes<int32, 1>::ConstTensor row_starts(
+        //const_row_spans.starts.tensor<int32, 1>());
+        auto& rowStarts = rowSpans._starts; // shape {outWidth}
+        auto& rowWeights = rowSpans._weights; // shape {outWidth, numSpans}
+        auto& columnStarts = colSpans._starts; // shape {outHeights}
+        auto& columnWeights = colSpans._weights; // shape {outHeights, numSpans}
+
+        gatherSpans<X, Z>(rowSpans._spanSize, rowStarts, rowWeights, colSpans._spanSize, columnStarts, columnWeights, input, intermediate, output);
+        return res;
+    }
+
+    static int resizeBilinear(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        auto kernel = std::unique_ptr<IKernelFunc>(new TriangleKernelFunc());
+        BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,
+                              (kernel.get(), image, (Nd4jLong) width, (Nd4jLong) height, antialias, output),
+                              NUMERIC_TYPES, FLOAT_TYPES_1);
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeBilinear: Unknown error occured.");
+    }
+
+    static int resizeBicubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        if (antialias) {
+            auto kernel = std::unique_ptr<IKernelFunc>(new KeysCubicKernelFunc());
+            BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,
+                                  (kernel.get(), image, (Nd4jLong) width, (Nd4jLong) height, antialias, output),
+                                  NUMERIC_TYPES, FLOAT_TYPES_1);
+        }
+        else {
+            return resizeBicubicFunctorA(context, image, width, height, false, true, output);
+        }
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeBicubic: Unknown error occured.");
+    }
+
+    static int resizeNeighbor(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        return resizeNeighborFunctor(context, image, width, height, false, true, output);
+    }
+
+    static int resizeArea(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        return resizeAreaFunctor(context, image, width, height, false, output);
+    }
+
+    static int resizeLanczos3(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        auto kernel = std::unique_ptr<IKernelFunc>(new LanczosKernelFunc(3.f));
+        BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos3: Unknown error occured.");
+    }
+
+    static int resizeLanczos5(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        auto kernel = std::unique_ptr<IKernelFunc>(new LanczosKernelFunc(5.f));
+        BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos5: Unknown error occured.");
+    }
+
+    static int resizeGaussian(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        auto kernel = std::unique_ptr<IKernelFunc>(new GaussianKernelFunc());
+        BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeGaussian: Unknown error occured.");
+    }
+
+    static int resizeMitchellcubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+        auto kernel = std::unique_ptr<IKernelFunc>(new MitchellCubicKernelFunc());
+        BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel, (kernel.get(), image, (Nd4jLong)width, (Nd4jLong)height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+        return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeMitchelcubic: Unknown error occured.");
+    }
+
+// ------------------------------------------------------------------------------------------------------------------ //
+    int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
+                      ImageResizeMethods method, bool alignCorners, NDArray* output) {
+        switch (method) {
+            case kResizeBilinear:
+                return resizeBilinearFunctor(context, image, width, height, alignCorners, false, output);
+            case kResizeNearest:
+                return resizeNeighborFunctor(context, image, width, height, alignCorners, false, output);
+            case kResizeBicubic:
+                return resizeBicubicFunctor(context, image, width, height, alignCorners, false, output);
+            case kResizeArea:
+                return resizeAreaFunctor(context, image, width, height, alignCorners, output);
+        }
+        nd4j_printf("helper::resizeImagesFunctor: Wrong resize method %i\n", (int)method);
+        return Status::CODE(ND4J_STATUS_BAD_INPUT, "helper::resizeImagesFunctor: Wrong resize method");
+    }
 // ------------------------------------------------------------------------------------------------------------------ //
     int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
-                      ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output) {
+                      ImageResizeMethods method, bool antialias, NDArray* output) {
         switch (method) {
-            case kResizeBilinear: return resizeBilinearFunctor(context, image, width, height, false, false, output); break;
-            case kResizeNearest: return resizeNeighborFunctor(context, image, width, height, false, false, output); break;
-            case kResizeBicubic: return resizeBicubicFunctor(context, image, width, height, preserveAspectRatio, antialias, output); break;
-            case kResizeArea: return resizeAreaFunctor(context, image, width, height, preserveAspectRatio, output);
-            case kResizeLanczos5:
-            case kResizeGaussian:
-            case kResizeMitchelcubic:
-                throw std::runtime_error("helper::resizeFunctor: Non implemented yet.");
+            case kResizeBilinear:     return resizeBilinear(context, image, width, height, antialias, output);
+            case kResizeNearest:      return resizeNeighbor(context, image, width, height,  antialias, output);
+            case kResizeBicubic:      return resizeBicubic(context, image, width, height,  antialias, output);
+            case kResizeArea:         return resizeArea(context, image, width, height, antialias, output);
+            case kResizeLanczos3:     return resizeLanczos3(context, image, width, height, antialias, output);
+            case kResizeLanczos5:     return resizeLanczos5(context, image, width, height, antialias, output);
+            case kResizeGaussian:     return resizeGaussian(context, image, width, height, antialias, output);
+            case kResizeMitchellcubic: return resizeMitchellcubic(context, image, width, height, antialias, output);
         }
-        return ND4J_STATUS_OK;
+        nd4j_printf("helper::resizeFunctor: Wrong resize method %i\n", (int)method);
+        return Status::CODE(ND4J_STATUS_BAD_INPUT, "helper::resizeFunctor: Wrong resize method");
     }
 
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
index 180c8ad0e..3365d5d62 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu
@@ -35,6 +35,7 @@ limitations under the License.
 
 #include <ops/declarable/helpers/image_resize.h>
 #include <exceptions/cuda_exception.h>
+#include <array/NDArrayFactory.h>
 
 namespace sd {
 namespace ops {
@@ -1203,20 +1204,22 @@ namespace helpers {
     BUILD_SINGLE_TEMPLATE(template int resizeBicubicFunctorA_, (sd::LaunchContext * context,
             NDArray const* image, int width, int height, bool const alignCorners, bool const halfPixelCenters, NDArray* output), NUMERIC_TYPES);
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int width, int height,
-                      ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output) {
+
+// ------------------------------------------------------------------------------------------------------------------ //
+    int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
+                            ImageResizeMethods method, bool alignCorners, NDArray* output) {
         switch (method) {
-            case kResizeBilinear: return resizeBilinearFunctor(context, image, width, height, false, false, output); break;
-            case kResizeNearest:  return resizeNeighborFunctor(context, image, width, height, false, false, output); break;
-            case kResizeBicubic:  return resizeBicubicFunctor(context, image, width, height, preserveAspectRatio, antialias, output); break;
-            case kResizeLanczos5:
-            case kResizeGaussian:
+            case kResizeBilinear:
+                return resizeBilinearFunctor(context, image, width, height, alignCorners, false, output);
+            case kResizeNearest:
+                return resizeNeighborFunctor(context, image, width, height, alignCorners, false, output);
+            case kResizeBicubic:
+                return resizeBicubicFunctor(context, image, width, height, alignCorners, false, output);
             case kResizeArea:
-            case kResizeMitchelcubic:
-                 throw std::runtime_error("helper::resizeFunctor: Non implemented yet.");
+                return resizeAreaFunctor(context, image, width, height, alignCorners, output);
+            default:
+                throw std::runtime_error("helper::resizeImagesFunctor: Wrong resize method.");
         }
-        return ND4J_STATUS_OK;
     }
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu
new file mode 100644
index 000000000..b727822c9
--- /dev/null
+++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize_v2.cu
@@ -0,0 +1,497 @@
+#include <array/NDArrayFactory.h>
+#include <exceptions/cuda_exception.h>
+#include <ops/declarable/helpers/image_resize.h>
+#include <helpers/PointersManager.h>
+
+namespace sd {
+    namespace ops {
+        namespace helpers {
+// -------------------------------------------------------------------------------------------------------------- //
+// resize v2 implementation                                                                                       //
+// -------------------------------------------------------------------------------------------------------------- //
+// A functional interface for a scale kernels.
+//struct IKernelFunc {
+//    _CUDA_HD virtual float operator()(float x) const = 0;
+//    _CUDA_HD virtual float radius() const = 0;
+//    _CUDA_HD virtual size_t size() const = 0;
+//};
+
+struct LanczosKernelFunc /*: public IKernelFunc*/ {
+    // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc.
+    explicit LanczosKernelFunc(float const radius) : _radius(radius) {}
+    _CUDA_HD float operator()(float x) const {
+        float const kPI = 3.141592653589793f;
+        x = math::nd4j_abs(x);
+        if (x > _radius) return 0.f;
+        // Need to special case the limit case of sin(x) / x when x is zero.
+        if (x <= 1.e-3f) {
+            return 1.f;
+        }
+        return _radius * std::sin(kPI * x) * std::sin(kPI * x / _radius) / (kPI * kPI * x * x);
+    }
+    _CUDA_HD float radius() const { return _radius; }
+    const float _radius;
+};
+
+struct GaussianKernelFunc /*: public IKernelFunc*/ {
+    static constexpr float kRadiusMultiplier = 3.0f;
+    // https://en.wikipedia.org/wiki/Gaussian_function
+    // We use sigma = 0.5, as suggested on p. 4 of Ken Turkowski's "Filters
+    // for Common Resampling Tasks" for kernels with a support of 3 pixels:
+    // www.realitypixels.com/turk/computergraphics/ResamplingFilters.pdf
+    // This implies a radius of 1.5,
+    explicit GaussianKernelFunc(float radius = 1.5f)
+            : _radius(radius), _sigma(radius / kRadiusMultiplier) {}
+    _CUDA_HD float operator()(float x) const {
+        x = math::nd4j_abs(x);
+        if (x >= _radius) return 0.0f;
+        return std::exp(-x * x / (2.0 * _sigma * _sigma));
+    }
+    _CUDA_HD float radius() const { return _radius; }
+    const float _radius;
+    const float _sigma;  // Gaussian standard deviation
+};
+
+struct BoxKernelFunc /*: public IKernelFunc*/ {
+    _CUDA_HD float operator()(float x) const {
+        x = math::nd4j_abs(x);
+        return x < 0.5f ? 1.f : x == 0.5f ? 0.5f : 0.f;
+    }
+    _CUDA_HD float radius() const { return 1.f; }
+    _CUDA_HD size_t size() const { return sizeof(BoxKernelFunc); }
+};
+
+struct TriangleKernelFunc /*: public IKernelFunc*/ {
+    // https://en.wikipedia.org/wiki/Triangle_function
+    _CUDA_HD float operator()(float x) const {
+        x = math::nd4j_abs(x);
+        return x < 1.f ? 1.f - x : 0.f;
+    }
+    _CUDA_HD float radius() const { return 1.f; }
+};
+
+struct KeysCubicKernelFunc /*: public IKernelFunc*/ {
+    // http://ieeexplore.ieee.org/document/1163711/
+    // R. G. Keys. Cubic convolution interpolation for digital image
+    // processing. IEEE Transactions on Acoustics, Speech, and Signal
+    // Processing, 29(6):1153–1160, 1981.
+    _CUDA_HD float operator()(float x) const {
+        x = math::nd4j_abs(x);
+        if (x >= 2.0f) {
+            return 0.0f;
+        } else if (x >= 1.0f) {
+            return ((-0.5f * x + 2.5f) * x - 4.0f) * x + 2.0f;
+        } else {
+            return ((1.5f * x - 2.5f) * x) * x + 1.0f;
+        }
+    }
+    _CUDA_HD float radius() const { return 2.f; }
+};
+
+struct MitchellCubicKernelFunc/* : public IKernelFunc*/ {
+    // https://doi.org/10.1145/378456.378514
+    // D. P. Mitchell and A. N. Netravali. Reconstruction filters in computer
+    // graphics.  Computer Graphics (Proceedings of ACM SIGGRAPH 1988),
+    // 22(4):221–228, 1988.
+    _CUDA_HD float operator()(float x) const {
+        x = math::nd4j_abs(x);
+        if (x >= 2.f) {
+            return 0.f;
+        } else if (x >= 1.f) {
+            return (((-7.f / 18.f) * x + 2.f) * x - 10.f / 3.f) * x + 16.f / 9.f;
+        } else {
+            return (((7.f / 6.f) * x - 2.f) * x) * x + 8.f / 9.f;
+        }
+    }
+    _CUDA_HD float radius() const { return 2.f; }
+};
+
+// A pre-computed span of pixels along a single dimension.
+// The output pixel will be the weighted sum of pixels starting from start.
+struct Spans {
+    // The maximum span size of any output pixel.
+    int _spanSize;
+    // int32 tensor with shape {outputSize}.
+    NDArray _starts;
+
+    // float32 tensor of size {outputSize, spanSize}.
+    // The output pixel at x is computed as:
+    //   dot_product(input[starts[x]:starts[x]+span_size], weights[x]).
+    NDArray _weights;
+};
+
+static inline _CUDA_HD Nd4jLong boundsAmp(Nd4jLong  const low, Nd4jLong const high, Nd4jLong const value) {
+    if (high < value) return high;
+    if (value < low) return low;
+    return value;
+}
+
+template <typename TKernelFunc>
+static __global__ void computeSpansKernel(TKernelFunc* kernel, int* startsVec, float* weightsVector, Nd4jLong outSize, Nd4jLong  inSize, float kernelScale, int spanSize, float const invScale, float const invTranslate, float invKernelScale, float* tempWeightsBuf) {
+
+    // return value if within bounds or bounds otherwise
+    auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+    auto step = blockDim.x * gridDim.x;
+    __shared__ int maxSpanSize;
+
+    if (threadIdx.x == 0 && blockIdx.x == 0) {
+        maxSpanSize = 0;
+    }
+    __syncthreads();
+
+    for (auto x = tid; x < outSize; x += step) {
+        const float columnFloat = x + 0.5f;
+        const float sampleFloat = columnFloat * invScale + invTranslate;
+
+        // Don't sample when the sampling location is outside the source image.
+        if (sampleFloat < 0 || sampleFloat > inSize) {
+            // Add an empty span.
+            startsVec[x] = 0;
+            continue;
+        }
+        Nd4jLong spanStart = math::nd4j_ceil<float,float>(sampleFloat - kernel->radius() * kernelScale - 0.5f);
+        Nd4jLong spanEnd = math::nd4j_floor<float, float>(sampleFloat + kernel->radius() * kernelScale - 0.5f);
+        spanStart = boundsAmp(0LL, inSize - 1, spanStart);
+        spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1;
+        int const spanSize = spanEnd - spanStart;
+        if (spanSize > spanSize) {
+            return ; //throw "Exception"; ////return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize));
+        }
+        float totalWeightSum = 0.f;
+        auto tempWeights = &tempWeightsBuf[x];
+        auto actualWeights = 0;
+        for (int source = spanStart; source < spanEnd; ++source) {
+            float kernelPos = static_cast<float>(source) + 0.5f - sampleFloat;
+            float weight = (*kernel)(kernelPos * invKernelScale);
+            totalWeightSum += weight;
+            tempWeights[actualWeights++] = weight;
+        }
+        maxSpanSize = math::nd4j_max(maxSpanSize, spanSize);
+        if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min<float>()) { //
+            auto totalWeightSumInverted = 1.0f / totalWeightSum;
+            auto outIndex = spanSize * x;
+            for (auto weightIndex = 0; weightIndex < actualWeights; ++weightIndex) {
+                weightsVector[outIndex] = tempWeights[weightIndex] * totalWeightSumInverted;
+                ++outIndex;
+            }
+        }
+        startsVec[x] = spanStart;
+    }
+
+}
+
+template <typename TKernelFunc>
+static int computeSpans(LaunchContext* context, TKernelFunc& kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans) {
+    // When sampling, we need the inverse scale and translation, to map from an
+    // output to an input pixel.
+    float const invScale = 1.f / scale;
+    float const invTranslate = -invScale * translate;
+    // When downsampling the kernel should be scaled since we want to low pass
+    // filter and interpolate, but when upsampling it should not be since we only
+    // want to interpolate.
+    float  const kernelScale = antialias ? math::nd4j_max(invScale, 1.f) : 1.f;
+    spans._spanSize = math::nd4j_min(2 * static_cast<int>(std::ceil(kernel.radius() * kernelScale)) + 1, static_cast<int>(inSize));
+    spans._starts = NDArrayFactory::create<int>('c', {outSize}); spans._starts.syncToHost();
+    spans._weights = NDArrayFactory::create<float>('c', {outSize, spans._spanSize}); spans._weights.syncToHost();
+
+    auto startsVec = reinterpret_cast<int*>(spans._starts.buffer());
+    auto weightsVector = reinterpret_cast<float*>(spans._weights.buffer());
+    spans._weights.nullify();
+
+    const float invKernelScale = 1.f / kernelScale;
+//    NDArray tempWeights = NDArrayFactory::create<float>('c', {outSize, spans._spanSize});
+//    auto tempWeightsBuf = reinterpret_cast<float*>(tempWeights.specialBuffer());
+//    PointersManager mg(context, "ops::helpers::computeSpans");
+//    auto specialKernel = reinterpret_cast<TKernelFunc*>(mg.replicatePointer(&kernel, sizeof(TKernelFunc)));
+    auto stream = context->getCudaStream();
+    //computeSpansKernel<TKernelFunc><<<1, 1, 128, *stream>>>(specialKernel, startsVec, weightsVector, outSize, inSize, kernelScale, spans._spanSize, invScale, invTranslate, invKernelScale, tempWeightsBuf);
+    auto maxSpanSize = 0;
+    std::vector<float> tempWeights;
+    for (auto x = 0; x < outSize; x ++) {
+        const float columnFloat = x + 0.5f;
+        const float sampleFloat = columnFloat * invScale + invTranslate;
+
+        // Don't sample when the sampling location is outside the source image.
+        if (sampleFloat < 0 || sampleFloat > inSize) {
+            // Add an empty span.
+            startsVec[x] = 0;
+            continue;
+        }
+        Nd4jLong spanStart = math::nd4j_ceil<float,float>(sampleFloat - kernel.radius() * kernelScale - 0.5f);
+        Nd4jLong spanEnd = math::nd4j_floor<float, float>(sampleFloat + kernel.radius() * kernelScale - 0.5f);
+        spanStart = boundsAmp(0LL, inSize - 1, spanStart);
+        spanEnd = boundsAmp(0LL, inSize - 1, spanEnd) + 1;
+        int const spanSize = spanEnd - spanStart;
+        if (spanSize > spans._spanSize) {
+            return Status::CODE(ND4J_STATUS_BAD_INPUT, "Span is too large: "); // + spanSize + " vs " + spans._spanSize);//, spanSize, spans._spanSize));
+        }
+        float totalWeightSum = 0.f;
+        tempWeights.clear();
+
+        for (int source = spanStart; source < spanEnd; ++source) {
+            float kernelPos = static_cast<float>(source) + 0.5f - sampleFloat;
+            float weight = kernel(kernelPos * invKernelScale);
+            totalWeightSum += weight;
+            tempWeights.push_back(weight);
+        }
+        maxSpanSize = math::nd4j_max(maxSpanSize, spanSize);
+        if (math::nd4j_abs(totalWeightSum) >= 1000.f * DataTypeUtils::min<float>()) { //
+            auto totalWeightSumInverted = 1.0f / totalWeightSum;
+            auto outIndex = spans._spanSize * x;
+            for (auto weightIndex = 0; weightIndex < tempWeights.size(); ++weightIndex) {
+                weightsVector[outIndex++] = tempWeights[weightIndex] * totalWeightSumInverted;
+//                ++outIndex;
+            }
+        }
+        startsVec[x] = spanStart;
+    }
+    spans._starts.tickWriteHost(); spans._weights.tickWriteHost();
+    spans._starts.syncToDevice();
+    spans._weights.syncToDevice();
+//    cudaStreamSynchronize(*stream);
+    return Status::OK();
+}
+
+//template int computeSpans(LaunchContext* context, TriangleKernelFunc& kernel, Nd4jLong const outSize, Nd4jLong const inSize, float const scale, float const translate, bool const antialias, Spans& spans);
+
+
+template <typename X, typename Z>
+static __device__ void gatherRows(int const spanSize, int const* starts, Z const* weights, X const* imagePtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight,
+                       Nd4jLong const outputWidth, Nd4jLong const channels, Z* outputPtr) {
+    auto inRowSize = inputWidth * channels;
+    auto outRowSize = outputWidth * channels;
+
+    auto addScaledVector = [](const X* inVector, int vectorLen, Z weight, Z* outVector) {
+        Z* outVecEnd = outVector + vectorLen;
+        for (; outVector != outVecEnd; ++outVector, ++inVector) {
+            *outVector += weight * static_cast<Z>(*inVector);
+        }
+    };
+
+    for (int y = 0; y < outputHeight; ++y) {
+        Z* outRowData = outputPtr + outRowSize * y;
+        memset(outRowData, '\0', outRowSize * sizeof(Z));//            std::fill(outRowData, outRowData + outRowSize, 0.f);
+        int inRow = starts[y];
+        auto inRowData = imagePtr + inRowSize * inRow;
+        auto weightsStart = weights + y * spanSize;
+        auto realSpanSize = math::nd4j_min(starts[y] + spanSize, static_cast<int>(inputHeight)) - starts[y];
+        auto weightsEnd = weightsStart + realSpanSize;
+        for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) {
+            addScaledVector(inRowData, inRowSize, *weightPtr, outRowData);
+            inRowData += inRowSize;
+        }
+    }
+}
+
+template <typename Z>
+static __device__ void gatherColumns(int const spanSize, int const* starts, Z const* weights, Z const* imagesPtr, Nd4jLong const inputHeight, Nd4jLong const inputWidth, Nd4jLong const outputHeight, Nd4jLong const outputWidth, Nd4jLong channels, Z* outputPtr) {
+    auto inRowSize = inputWidth * channels;
+    auto outRowSize = outputWidth * channels;
+
+    for (auto y = 0LL; y < outputHeight; ++y) {
+        auto inputRowStart = imagesPtr + inRowSize * y;
+        auto outPixels = outputPtr + outRowSize * y;
+        for (auto x = 0LL; x < outputWidth; ++x, outPixels += channels) {
+            auto inPixels = inputRowStart + starts[x] * channels;
+            auto weightsStart = weights + x * spanSize;
+            auto realSpanSize = math::nd4j_min(starts[x] + spanSize, static_cast<int>(inputWidth)) - starts[x];
+            auto weightsEnd = weightsStart + realSpanSize;
+            for (int c = 0; c < channels; ++c) {
+                outPixels[c] = 0.0f;
+            }
+            for (auto weightPtr = weightsStart; weightPtr != weightsEnd; ++weightPtr) {
+                Z w = *weightPtr;
+                for (int c = 0; c < channels; ++c) {
+                    outPixels[c] += w * static_cast<Z>(inPixels[c]);
+                }
+                inPixels += channels;
+            }
+        }
+    }
+}
+
+template <typename X, typename Z>
+static __global__ void batchedGatherSpan(Nd4jLong batchSize, Nd4jLong inputWidth, Nd4jLong inputHeight, Nd4jLong outputWidth, Nd4jLong outputHeight, Nd4jLong channels, int rowSpanSize, int const* rowStartsBuf, Z const* rowWeightBuf, int columnSpanSize, int const* columnStartsBuf, Z const* columnWeightBuf, X const* pImages, Z* pIntermediate, Z* pOutput,
+        Nd4jLong inputPixPerBatch, Nd4jLong intermediatePixPerBatch, Nd4jLong outputPixPerBatch) {
+
+    auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+    auto step = blockDim.x * gridDim.x;
+
+    for (int b = tid; b < batchSize; b += step) {
+        auto imagePtr = pImages + b * inputPixPerBatch;
+        auto intermediatePtr = pIntermediate + b * intermediatePixPerBatch;
+        auto outputPtr = pOutput + b * outputPixPerBatch;
+        gatherRows<X, Z>(rowSpanSize, rowStartsBuf, rowWeightBuf,
+                         imagePtr, inputHeight, inputWidth, outputHeight,
+                         inputWidth, channels, intermediatePtr);
+        gatherColumns<Z>(columnSpanSize, columnStartsBuf, columnWeightBuf,
+                         intermediatePtr, outputHeight, inputWidth, outputHeight, outputWidth, channels, outputPtr);
+    }
+}
+
+template <typename X, typename Z>
+static void gatherSpans(LaunchContext* context, int const rowSpanSize, NDArray const& rowStarts, NDArray const& rowWeights, int const colSpanSize, NDArray const& columnStarts, NDArray const& columnWeights, NDArray const* images, NDArray& intermediate, NDArray* output) {
+    auto batchSize = images->sizeAt(0);
+    auto inputHeight = images->sizeAt(1);
+    auto inputWidth = images->sizeAt(2);
+    auto channels = images->sizeAt(3);
+
+    auto outputHeight = output->sizeAt(1);
+    auto outputWidth = output->sizeAt(2);
+
+    auto inputPixPerBatch = inputWidth * inputHeight * channels;
+    auto intermediatePixPerBatch = inputWidth * outputHeight * channels;
+    auto outputPixPerBatch = outputWidth * outputHeight * channels;
+    auto intermediatePtr = reinterpret_cast<Z*>(intermediate.specialBuffer());
+
+    auto imagePtr = reinterpret_cast<X const*>(images->specialBuffer());
+    auto outputPtr = reinterpret_cast<Z*>(output->specialBuffer());
+    auto stream = context->getCudaStream();
+    auto rowStartsBuf = reinterpret_cast<int const*>(rowStarts.specialBuffer());
+    auto rowWeightBuf = reinterpret_cast<Z const*>(rowWeights.specialBuffer());
+    auto columnStartsBuf = reinterpret_cast<int const*>(columnStarts.specialBuffer());
+    auto columnWeightBuf = reinterpret_cast<Z const*>(columnWeights.specialBuffer());
+    batchedGatherSpan<X,Z><<<128, 128, 256, *stream>>>(batchSize, inputWidth, inputHeight, outputWidth, outputHeight, channels, rowSpanSize, rowStartsBuf, rowWeightBuf, colSpanSize, columnStartsBuf, columnWeightBuf, imagePtr, intermediatePtr, outputPtr, inputPixPerBatch, intermediatePixPerBatch, outputPixPerBatch);
+}
+
+template <typename X, typename Z>
+static int resizeKernel(LaunchContext* context, ImageResizeMethods method, NDArray const* input, Nd4jLong outWidth, Nd4jLong outHeight, bool antialias, NDArray* output) {
+    Nd4jLong const batchSize = input->sizeAt(0);
+    Nd4jLong const inputHeight = input->sizeAt(1);
+    Nd4jLong const inputWidth = input->sizeAt(2);
+    Nd4jLong const channels = input->sizeAt(3);
+    NDArray::prepareSpecialUse({output}, {input});
+    Z rowScale = Z(outHeight) / Z(inputHeight);
+    Z columnScale = Z(outWidth) / Z(inputWidth);
+
+    // Return if the output is empty.
+    if (output->lengthOf() == 0) return Status::OK();
+
+    Spans colSpans;
+    Spans rowSpans;
+    auto res = Status::OK();
+    switch(method) {
+        case kResizeBilinear: {
+            TriangleKernelFunc kernel;
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                                                   colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        }
+            break;
+        case kResizeBicubic: {
+            KeysCubicKernelFunc kernel;
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                               colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+        } break;
+        case kResizeLanczos3:{
+            LanczosKernelFunc kernel(3.f);
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                               colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        } break;
+
+        case kResizeLanczos5: {
+            LanczosKernelFunc kernel(5.f);
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                               colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        } break;
+        case kResizeGaussian: {
+            GaussianKernelFunc kernel;
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                               colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        } break;
+        case kResizeMitchellcubic:{
+            MitchellCubicKernelFunc kernel;
+            res = computeSpans(context, kernel, outWidth, inputWidth, columnScale, 0.f, antialias,
+                               colSpans);
+            if (res != Status::OK()) return res;
+            res = computeSpans(context, kernel, outHeight, inputHeight, rowScale, 0.f, antialias, rowSpans);
+
+        } break;
+    };
+
+    NDArray intermediate = NDArrayFactory::create<Z>('c', {batchSize, outHeight, inputWidth, channels});
+
+    //const functor::Spans& const_row_spans = row_spans;
+    //typename TTypes<int32, 1>::ConstTensor row_starts(
+    //const_row_spans.starts.tensor<int32, 1>());
+    auto& rowStarts = rowSpans._starts; // shape {outWidth}
+    auto& rowWeights = rowSpans._weights; // shape {outWidth, numSpans}
+    auto& columnStarts = colSpans._starts; // shape {outHeights}
+    auto& columnWeights = colSpans._weights; // shape {outHeights, numSpans}
+
+    gatherSpans<X, Z>(context, rowSpans._spanSize, rowStarts, rowWeights, colSpans._spanSize, columnStarts, columnWeights, input, intermediate, output);
+
+    NDArray::registerSpecialUse({output}, {input});
+    return res;
+}
+
+
+static int resizeTriangle(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+//    std::unique_ptr<IKernelFunc> kernel(new TriangleKernelFunc);
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeBilinear, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeTriangle: This resize method is avaliable in future versions");
+}
+
+static int resizeLanczos3(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+//    std::unique_ptr<IKernelFunc> kernel(new LanczosKernelFunc(3.f));
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeLanczos3, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos3: This resize method is avaliable in future versions");
+}
+
+static int resizeLanczos5(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+//    std::unique_ptr<IKernelFunc> kernel(new LanczosKernelFunc(5.f));
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeLanczos5, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeLanczos5: This resize method is avaliable in future versions");
+}
+
+static int resizeGaussian(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeGaussian, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeGaussian: This resize method is avaliable in future versions");
+}
+static int resizeMitchellcubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeMitchellcubic, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeMitchelcubic: This resize method is avaliable in future versions");
+}
+static int resizeKeycubic(sd::LaunchContext * context, NDArray const* image, int const width, int const height, bool const antialias, NDArray* output) {
+    if (!antialias)
+        return resizeBicubicFunctorA(context, image, width, height, false, true, output);
+    BUILD_DOUBLE_SELECTOR(image->dataType(), output->dataType(), return resizeKernel,(context, kResizeBicubic, image, width, height, antialias, output), NUMERIC_TYPES, FLOAT_TYPES_1);
+    return Status::CODE(ND4J_STATUS_VALIDATION, "helpers::resizeKeycubic: This resize method is avaliable in future versions");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int width, int height,
+                  ImageResizeMethods method, bool antialias, NDArray* output) {
+    switch (method) {
+        case kResizeBilinear:     return resizeTriangle(context, image, width, height, antialias, output);
+        case kResizeNearest:      return resizeNeighborFunctor(context, image, width, height, false, true, output);
+        case kResizeBicubic:      return resizeKeycubic(context, image, width, height, antialias, output);
+        case kResizeLanczos3:     return resizeLanczos3(context, image, width, height, antialias, output);
+        case kResizeLanczos5:     return resizeLanczos5(context, image, width, height, antialias, output);
+        case kResizeGaussian:     return resizeGaussian(context, image, width, height, antialias, output);
+        case kResizeArea:         return resizeAreaFunctor(context, image, width, height, false, output);
+        case kResizeMitchellcubic: return resizeMitchellcubic(context, image, width, height, antialias, output);
+        default:
+            nd4j_printf("helper::resizeFunctor: Wrong resize method %i\n", (int)method);
+            throw std::runtime_error("helper::resizeFunctor: Wrong resize method.");
+    }
+    return ND4J_STATUS_OK;
+}
+
+
+        }
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/image_resize.h b/libnd4j/include/ops/declarable/helpers/image_resize.h
index c11e94ed4..bd9e10b58 100644
--- a/libnd4j/include/ops/declarable/helpers/image_resize.h
+++ b/libnd4j/include/ops/declarable/helpers/image_resize.h
@@ -28,13 +28,17 @@ namespace ops {
 namespace helpers {
 
     enum ImageResizeMethods {
-        kResizeBilinear = 1,
-        kResizeBicubic,
+        kResizeBilinear = 0, // as java require
         kResizeNearest,
+        kResizeBicubic,
+        kResizeArea,
         kResizeGaussian,
+        kResizeLanczos3,
         kResizeLanczos5,
-        kResizeMitchelcubic,
-        kResizeArea
+        kResizeMitchellcubic,
+        kResizeFirst = kResizeBilinear,
+        kResizeLast = kResizeMitchellcubic,
+        kResizeOldLast = kResizeArea
     };
 
     int resizeBilinearFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
@@ -49,7 +53,10 @@ namespace helpers {
                              bool const alignCorners, NDArray* output);
 
     int resizeFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
-            ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output);
+            ImageResizeMethods method, bool antialias, NDArray* output);
+
+    int resizeImagesFunctor(sd::LaunchContext * context, NDArray const* image, int const width, int const height,
+                      ImageResizeMethods method, bool alignCorners, NDArray* output);
 }
 }
 }
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
index 6d89bd182..2ffc2c22d 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
@@ -396,6 +396,29 @@ TEST_F(DeclarableOpsTests10, TestMarixBandPart_Test_1) {
     ASSERT_TRUE(exp.equalsTo(results.at(0)));
 }
 
+///////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests10, TestMarixBandPart_Test_2) {
+
+    auto x = NDArrayFactory::create<double>('c', {2, 3, 3});
+    auto minD = NDArrayFactory::create<int>(1);
+    auto maxD = NDArrayFactory::create<int>(1);
+    auto exp = NDArrayFactory::create<double>('c', {2, 3, 3});
+    x.linspace(1);
+    exp.linspace(1);
+    exp.p(0, 0, 2, 0.);
+    exp.p(1, 0, 2, 0.);
+    exp.p(0, 2, 0, 0.);
+    exp.p(1, 2, 0, 0.);
+
+    sd::ops::matrix_band_part op;
+    auto results = op.evaluate({&x, &minD, &maxD}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+    //results.at(0)->printIndexedBuffer("MBP Test1");
+    //exp.printIndexedBuffer("MBP Expec");
+    ASSERT_TRUE(exp.equalsTo(results.at(0)));
+}
+
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests10, atan2_test1) {
 
@@ -1528,6 +1551,71 @@ TEST_F(DeclarableOpsTests10, ImageResizeBilinear_Test01) {
 
 }
 
+TEST_F(DeclarableOpsTests10, ResizeImages_Test1) {
+
+    NDArray input = NDArrayFactory::create<float>('c', {2, 4, 5, 3});
+    input.linspace(1.);
+
+    auto expected = NDArrayFactory::create<float>('c', {2, 7, 9, 3}, {
+            1.f, 2.f, 3.f, 2.6666667f, 3.6666667f,  4.666667f, 4.3333335f, 5.3333335f, 6.3333335f, 6.f,
+            7.f, 8.f, 7.666667f, 8.666667f, 9.666667f, 9.333334f, 10.333334f, 11.333334f, 11.f, 12.f,
+            13.f, 12.666667f, 13.666667f, 14.666667f, 13.f, 14.f, 15.f, 9.571429f, 10.571429f, 11.571429f,
+            11.238095f, 12.238095f, 13.238095f, 12.904762f, 13.904762f, 14.904762f, 14.571429f, 15.571429f,  16.57143f,
+            16.238096f, 17.238096f, 18.238096f, 17.904762f, 18.904762f, 19.904762f,  19.57143f,  20.57143f,  21.57143f,
+            21.238096f, 22.238096f, 23.238096f,  21.57143f,  22.57143f,  23.57143f, 18.142859f, 19.142859f, 20.142859f,
+            19.809525f, 20.809525f, 21.809525f, 21.476192f, 22.476192f, 23.476192f, 23.142859f, 24.142859f, 25.142859f,
+            24.809526f, 25.809526f, 26.809526f, 26.476192f, 27.476192f, 28.476192f, 28.142859f, 29.142859f, 30.142859f,
+            29.809526f, 30.809526f, 31.809526f, 30.142859f, 31.142859f, 32.142857f, 26.714287f, 27.714287f, 28.714287f,
+            28.380955f, 29.380955f, 30.380955f,  30.04762f,  31.04762f, 32.047623f, 31.714287f, 32.714287f, 33.714287f,
+            33.380955f, 34.380955f, 35.380955f, 35.047623f, 36.047623f, 37.047623f, 36.714287f, 37.714287f, 38.714287f,
+            38.380955f, 39.380955f, 40.380955f, 38.714287f, 39.714287f, 40.714287f, 35.285717f, 36.285717f, 37.285717f,
+            36.952385f, 37.952385f, 38.952385f,  38.61905f,  39.61905f,  40.61905f, 40.285717f, 41.285717f, 42.285717f,
+            41.952385f, 42.952385f, 43.952385f,  43.61905f,  44.61905f,  45.61905f, 45.285717f, 46.285717f, 47.285717f,
+            46.952385f, 47.952385f, 48.952385f, 47.285717f, 48.285717f, 49.285717f, 43.857143f, 44.857143f, 45.857143f,
+             45.52381f,  46.52381f,  47.52381f, 47.190475f, 48.190475f, 49.190475f, 48.857143f, 49.857143f, 50.857143f,
+             50.52381f,  51.52381f,  52.52381f, 52.190475f, 53.190475f, 54.190475f, 53.857143f, 54.857143f, 55.857143f,
+             55.52381f,  56.52381f,  57.52381f, 55.857143f, 56.857143f, 57.857143f,       46.f,       47.f,       48.f,
+            47.666668f, 48.666668f, 49.666668f, 49.333332f, 50.333332f, 51.333332f,       51.f,       52.f,       53.f,
+            52.666668f, 53.666668f, 54.666668f, 54.333332f, 55.333332f, 56.333332f,       56.f,       57.f,       58.f,
+            57.666668f, 58.666668f, 59.666668f,       58.f,       59.f,       60.f,       61.f,       62.f,       63.f,
+            62.666668f, 63.666668f, 64.666664f, 64.333336f, 65.333336f, 66.333336f,       66.f,       67.f,       68.f,
+            67.666664f, 68.666664f, 69.666664f, 69.333336f, 70.333336f, 71.333336f,       71.f,       72.f,       73.f,
+            72.666664f, 73.666664f, 74.666664f,       73.f,       74.f,       75.f,  69.57143f,  70.57143f,  71.57143f,
+              71.2381f,   72.2381f,  73.23809f,  72.90476f,  73.90476f,  74.90476f,  74.57143f,  75.57143f,  76.57143f,
+             76.23809f,  77.23809f,  78.23809f,  77.90476f,  78.90476f,  79.90476f,  79.57143f,  80.57143f,  81.57143f,
+             81.23809f,  82.23809f,  83.23809f,  81.57143f,  82.57143f,  83.57143f,  78.14286f,  79.14286f,  80.14286f,
+            79.809525f, 80.809525f, 81.809525f,   81.4762f,   82.4762f,   83.4762f,  83.14286f,  84.14286f,  85.14286f,
+            84.809525f, 85.809525f, 86.809525f,   86.4762f,   87.4762f,   88.4762f,  88.14286f,  89.14286f,  90.14286f,
+            89.809525f, 90.809525f, 91.809525f,  90.14286f,  91.14286f,  92.14286f,  86.71429f,  87.71429f,  88.71429f,
+             88.38095f,  89.38095f,  90.38095f,  90.04762f,  91.04762f,  92.04762f,  91.71429f,  92.71429f,  93.71429f,
+             93.38095f,  94.38095f,  95.38095f,  95.04762f,  96.04762f,  97.04762f,  96.71429f,  97.71429f,  98.71429f,
+             98.38095f,  99.38095f, 100.38095f,  98.71429f,  99.71429f, 100.71429f,  95.28571f,  96.28571f,  97.28571f,
+             96.95238f,  97.95238f,  98.95238f,  98.61905f,  99.61905f, 100.61905f, 100.28571f, 101.28571f, 102.28571f,
+            101.95238f, 102.95238f, 103.95238f, 103.61905f, 104.61905f, 105.61905f, 105.28571f, 106.28571f, 107.28571f,
+            106.95238f, 107.95238f, 108.95238f, 107.28571f, 108.28571f, 109.28571f, 103.85715f, 104.85715f, 105.85715f,
+             105.5238f,  106.5238f,  107.5238f,107.190475f,108.190475f,109.190475f, 108.85715f, 109.85715f, 110.85715f,
+             110.5238f,  111.5238f,  112.5238f,112.190475f,113.190475f,114.190475f, 113.85715f, 114.85715f, 115.85715f,
+             115.5238f,  116.5238f,  117.5238f, 115.85715f, 116.85715f, 117.85715f,      106.f,      107.f,      108.f,
+           107.666664f,108.666664f,109.666664f,109.333336f,110.333336f,111.333336f,      111.f,      112.f,      113.f,
+           112.666664f,113.666664f,114.666664f,114.333336f,115.333336f,116.333336f,      116.f,      117.f,      118.f,
+           117.666664f,118.666664f,119.666664f,      118.f,      119.f,      120.f
+    });
+
+    auto size = NDArrayFactory::create<int>({7, 11});
+    sd::ops::resize_images op;
+    auto results = op.evaluate({&input, &size}, {}, {0}, {false, true}); // resize with bilinear method
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    NDArray *result = results.at(0);
+
+//    result->printBuffer("Resized to 7x9");
+//    expected.printBuffer("Expect for 7x9");
+//    result.printShapeInfo("Output shape");
+//    expected.printShapeInfo("Expect shape");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
 TEST_F(DeclarableOpsTests10, ImageResizeBilinear_Test02) {
 
     NDArray input    = NDArrayFactory::create<float>('c', {2, 5,5,3}, {
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
index 23c40ebae..97dcf7574 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp
@@ -25,6 +25,7 @@
 #include <ops/ops.h>
 #include <helpers/GradCheck.h>
 #include <helpers/MmulHelper.h>
+#include <ops/declarable/helpers/image_resize.h>
 
 using namespace sd;
 
@@ -1346,6 +1347,34 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test8) {
     ASSERT_TRUE(expected.equalsTo(result));
 }
 
+TEST_F(DeclarableOpsTests11, ResizeImages_Test8) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 3, 3, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9
+    });
+
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 6, 6, 1}, {
+//            1.f, 1.f, 2.f, 2.f, 3.f, 3.f, 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, 4.f, 4.f, 5.f, 5.f,
+//            6.f, 6.f, 7.f, 7.f, 8.f, 8.f, 9.f, 9.f, 7.f, 7.f, 8.f, 8.f, 9.f, 9.f
+       1.f       , 1.f       , 1.5f, 2.f       , 2.f, 3.f,       1.f      , 1.f       , 1.5f, 2.f       , 2.f, 3.f,
+       2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f,       4.f       , 4.f       , 4.5f      , 5.f, 5.f, 6.f ,
+       4.f, 4.f, 4.5f      , 5.f, 5.f, 6.f,       7.f , 7.f , 7.5f , 8.f , 8.f , 9.f
+    });
+    //input.linspace(1);
+//    auto size = NDArrayFactory::create<int>({6, 6});
+    sd::ops::resize_images op;
+    auto results = op.evaluate({&input}, {}, {6, 8, ops::helpers::kResizeArea}, {true, true}); // resize_area to 6x8 with align corners and preserve aspect ratio of input image
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    NDArray* result = results.at(0);
+
+//    result->printBuffer("Area Resized to 6x6");
+//    expected.printBuffer("Area Expect for 6x6");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
 ///////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests11, ImageResizeArea_Test9) {
 
@@ -1354,7 +1383,10 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test9) {
     });
 
     NDArray expected = NDArrayFactory::create<float>('c', {1, 10, 10, 4}, {
-            1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333336f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999998f, 9.999997f, 10.999997f, 11.999997f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 15.666671f, 16.666672f, 17.666672f, 18.666672f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 18.333344f, 19.333344f, 20.333345f, 21.333344f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000002f, 22.000000f, 23.000002f, 24.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 15.666661f, 16.666662f, 17.666660f, 18.666660f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 18.333334f, 19.333332f, 20.333334f, 21.333332f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999989f, 21.999989f, 22.999987f, 23.999987f
+        1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f,  2.000000f,
+        3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f,  8.000000f,
+        5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f,
+       11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333337f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 9.000000f, 10.000000f, 11.000000f, 12.000000f, 8.999998f, 9.999998f, 10.999998f, 11.999998f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 3.666667f, 4.666667f, 5.666667f, 6.666667f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 5.000000f, 6.000000f, 7.000000f, 8.000000f, 6.333336f, 7.333336f, 8.333336f, 9.333336f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999999f, 9.999999f, 11.000000f, 11.999999f, 8.999998f, 9.999997f, 10.999997f, 11.999997f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 13.000003f, 14.000004f, 15.000003f, 16.000004f, 15.666671f, 16.666672f, 17.666672f, 18.666672f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 17.000006f, 18.000004f, 19.000006f, 20.000004f, 18.333344f, 19.333344f, 20.333345f, 21.333344f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000006f, 22.000006f, 23.000006f, 24.000006f, 21.000002f, 22.000000f, 23.000002f, 24.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 13.000000f, 14.000001f, 15.000000f, 16.000000f, 15.666667f, 16.666668f, 17.666668f, 18.666668f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 17.000002f, 18.000000f, 19.000002f, 20.000000f, 18.333340f, 19.333340f, 20.333342f, 21.333340f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 21.000002f, 22.000000f, 22.999998f, 24.000000f, 20.999996f, 21.999996f, 22.999994f, 23.999996f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 12.999995f, 13.999995f, 14.999994f, 15.999994f, 15.666661f, 16.666662f, 17.666660f, 18.666660f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 16.999994f, 17.999994f, 18.999992f, 19.999992f, 18.333334f, 19.333332f, 20.333334f, 21.333332f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999992f, 21.999992f, 22.999990f, 23.999992f, 20.999989f, 21.999989f, 22.999987f, 23.999987f
 
     });
     //input.linspace(1);
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
index 2bca43ae9..66762f79d 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
@@ -27,6 +27,7 @@
 #include <helpers/ConstantTadHelper.h>
 #include <helpers/PointersManager.h>
 #include <helpers/MmulHelper.h>
+#include <ops/declarable/helpers/image_resize.h>
 
 using namespace sd;
 
@@ -2821,6 +2822,330 @@ TEST_F(DeclarableOpsTests12, QR_Test_2) {
     
 }
 
+TEST_F(DeclarableOpsTests12, ImageResize_Test1) {
+
+    NDArray input    = NDArrayFactory::create<float>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+             0.628328f,  0.97913796f,  1.8058043f,   2.563919f,   2.844548f,
+            3.6026628f,   4.4293294f,  4.7801394f,  2.9474494f,  3.2982588f,
+            4.1249247f,   4.8830395f,  5.1636696f,  5.9217834f,  6.7484493f,
+              7.09926f,    8.165832f,   8.516642f,  9.3433075f,  10.101422f,
+            10.382052f,   11.140167f,  11.966835f,  12.317646f,  10.924093f,
+            11.274903f,    12.10157f,  12.859686f,  13.140315f,  13.898429f,
+            14.725095f,   15.075906f,  13.682358f,  14.033167f,  14.859833f,
+            15.617949f,   15.898578f,  16.656693f,   17.48336f,  17.834171f,
+            18.900742f,   19.251549f,  20.078213f,   20.83633f,   21.11696f,
+            21.875074f,   22.701742f,  23.052553f,  21.219858f,   21.57067f,
+            22.397337f,   23.155449f,  23.436079f,  24.194195f,  25.020863f,
+            25.371672f
+    });
+
+    sd::ops::image_resize op;
+    // resize with lancos5 without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos5}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Lancos5 Resized to 7x8");
+//    expected.printBuffer("Lancos5 Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test2) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            0.628328f,  0.97913796f,  1.8058043f,   2.563919f,   2.844548f,
+            3.6026628f,   4.4293294f,  4.7801394f,  2.9474494f,  3.2982588f,
+            4.1249247f,   4.8830395f,  5.1636696f,  5.9217834f,  6.7484493f,
+            7.09926f,    8.165832f,   8.516642f,  9.3433075f,  10.101422f,
+            10.382052f,   11.140167f,  11.966835f,  12.317646f,  10.924093f,
+            11.274903f,    12.10157f,  12.859686f,  13.140315f,  13.898429f,
+            14.725095f,   15.075906f,  13.682358f,  14.033167f,  14.859833f,
+            15.617949f,   15.898578f,  16.656693f,   17.48336f,  17.834171f,
+            18.900742f,   19.251549f,  20.078213f,   20.83633f,   21.11696f,
+            21.875074f,   22.701742f,  23.052553f,  21.219858f,   21.57067f,
+            22.397337f,   23.155449f,  23.436079f,  24.194195f,  25.020863f,
+            25.371672f
+    });
+
+    sd::ops::image_resize op;
+    // resize with lanczos5 without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos5}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result.printBuffer("Lanczos5 Resized to 8x7");
+//    expected.printBuffer("Lanczos5 Expect for 8x7");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test3) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            0.6537938f,  1.0309073f,  1.8018917f,  2.4606667f,  2.9888396f,  3.6476145f,  4.418599f,
+            4.7957115f,  3.1913466f,  3.5684595f,  4.3394437f,   4.998219f,   5.526393f,  6.185168f,
+             6.956152f,  7.3332644f,   7.626866f,    8.00398f,   8.774965f,   9.433739f,  9.961912f,
+            10.620688f,  11.391673f, 11.7687845f,  10.929041f,  11.306154f,  12.077138f, 12.735914f,
+            13.264087f,  13.922862f,  14.693848f,   15.07096f,  14.231217f,   14.60833f, 15.379314f,
+            16.038086f,   16.56626f,  17.225037f,  17.996023f,  18.373135f,  18.666735f, 19.043848f,
+            19.814833f,  20.473606f,   21.00178f,  21.660557f,  22.431541f,  22.808653f, 21.204287f,
+            21.581398f,  22.352386f,   23.01116f,  23.539333f,   24.19811f,  24.969095f, 25.346205f
+    });
+
+    sd::ops::image_resize op;
+    // resize with lanczos3 without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeLanczos3}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result.printBuffer("Lanczos3 Resized to 8x7");
+//    expected.printBuffer("Lanczos3 Expect for 8x7");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test4) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+             1.4150869f,  1.7928237f,  2.4084527f,  3.0680697f, 3.6419308f,    4.301548f, 4.9171767f,
+              5.294914f,   4.012885f,   4.390622f,  5.0062513f, 5.6658688f,     6.23973f,  6.899347f,
+              7.514975f,  7.8927126f,   7.358912f,   7.736648f,  8.352278f,    9.011895f,  9.585756f,
+             10.245375f,  10.861001f,  11.238739f,  11.060086f, 11.437822f,  12.0534525f, 12.713069f,
+              13.28693f,  13.946548f,  14.562176f,  14.939912f, 14.761261f,   15.138998f, 15.754629f,
+             16.414246f,  16.988108f,  17.647724f,  18.263351f, 18.641088f,   18.107288f, 18.485023f,
+             19.100655f,  19.760273f,  20.334133f,  20.993752f, 21.609377f,   21.987114f, 20.705086f,
+             21.082823f,  21.698452f,   22.35807f,   22.93193f, 23.591549f,   24.207174f, 24.584913f
+    });
+
+    sd::ops::image_resize op;
+    // resize with gaussian without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeGaussian}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result.printBuffer("Lanczos3 Resized to 8x7");
+//    expected.printBuffer("Lanczos3 Expect for 8x7");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test5) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            0.6372399f,  1.0536414f,  1.7716959f,  2.3966959f,  3.0216959f,  3.6466963f,  4.3647504f,   4.781152f,
+            3.3926036f,  3.8090053f,  4.5270596f,  5.1520596f,  5.7770596f,  6.4020596f,  7.1201134f,  7.5365143f,
+             7.358708f,  7.7751093f,   8.493164f,   9.118163f,   9.743165f,  10.368165f,  11.086218f,  11.502619f,
+            10.928043f,  11.344445f,    12.0625f,    12.6875f,    13.3125f,    13.9375f,  14.655554f,  15.071955f,
+             14.49738f,  14.913782f,  15.631836f,  16.256836f,  16.881836f,  17.506836f,   18.22489f,   18.64129f,
+            18.463486f,  18.879889f,  19.597942f,  20.222942f,  20.847942f,  21.472942f,  22.190996f,  22.607397f,
+            21.218851f,  21.635252f,  22.353308f,  22.978308f,  23.603308f,  24.228308f,  24.946362f,  25.362762f
+    });
+
+    sd::ops::image_resize op;
+    // resize with bicubic without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBicubic}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Bicubic Resized to 7x8");
+//    expected.printBuffer("Bicubic Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test6) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            0.63678247f,  1.0531839f,  1.7712381f,  2.396238f,   3.021238f ,    3.646238f,   4.364292f,   4.780694f,
+            3.3934183f,   3.8098197f,  4.5278745f, 5.1528745f,  5.7778745f,    6.402874f,  7.1209283f,  7.5373297f,
+            7.3566165f,   7.7730184f,   8.491073f,  9.116073f,   9.741073f,  10.366074f , 11.084127f  , 11.500528f,
+            10.928043f,   11.344445f,   12.0625f   , 12.6875f    , 13.3125f    , 13.9375f    ,     14.655554f,  15.071955f , 14.499474f  , 14.915876f  , 15.633932f,   16.25893f, 16.883932f, 17.508932f, 18.226984f  , 18.643385f,
+              18.46267f,   18.87907f, 19.597128f, 20.222126f  , 20.847128f,      21.472126f, 22.190182f  , 22.606583f  , 21.219305f, 21.635706f  ,
+             22.353762f,  22.978762f  , 23.603762f  , 24.228764f, 24.946815f  ,      25.363216f
+    });
+
+    sd::ops::image_resize op;
+    // resize with bicubic with antialising and without aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBicubic}, {false, true});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Bicubic Resized to 7x8");
+//    expected.printBuffer("Bicubic Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test7) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            0.98593485f,  1.3872082f,  2.0625007f,  2.6875007f, 3.3125012f,    3.937501f,   4.612794f,   5.014066f,
+             3.6096964f,    4.01097f,  4.6862626f,   5.311262f,  5.936263f,    6.561262f,  7.2365556f,   7.637828f,
+             7.4145045f,  7.8157787f,   8.491071f,   9.116072f,  9.741073f,   10.366072f,  11.041365f, 11.4426365f,
+             10.985933f,  11.387209f,  12.062499f,  12.687501f, 13.312502f,     13.9375f,  14.612794f,  15.014066f,
+             14.557361f,  14.958637f,  15.633926f,   16.25893f,  16.88393f,   17.508926f,   18.18422f,  18.585491f,
+              18.36217f,  18.763443f,  19.438736f,  20.063736f, 20.688738f,   21.313736f,   21.98903f,    22.3903f,
+              20.985931f, 21.387209f,    22.0625f,    22.6875f,   23.3125f,   23.937498f,  24.612793f,  25.014061f
+    });
+
+    sd::ops::image_resize op;
+    // resize with Mitchell cubic with antialising and without aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeMitchellcubic}, {false, true});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Mitchell cubic Resized to 7x8");
+//    expected.printBuffer("Mitchell cubic Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test8) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            1.f       ,  1.4375f   ,  2.0625f   ,  2.6875f   ,  3.3125f   ,  3.9375f   ,  4.5625f   ,  5.f       ,
+            3.8571427f,  4.2946424f,  4.9196424f,  5.5446424f,  6.1696424f,  6.7946424f,  7.4196424f,  7.8571424f,
+            7.4285717f,  7.8660717f,  8.491072f ,  9.116072f ,  9.741072f , 10.366072f , 10.991072f , 11.428572f ,
+            11.f      , 11.4375f   , 12.0625f   , 12.6875f   , 13.3125f   , 13.9375f   , 14.5625f   , 15.f       ,
+            14.571429f , 15.008929f,  15.633929f, 16.25893f  , 16.88393f  , 17.50893f  , 18.13393f  , 18.57143f  ,
+            18.142857f , 18.580357f,  19.205357f, 19.830357f , 20.455357f , 21.080357f , 21.705357f , 22.142857f ,
+            21.f       , 21.4375f  , 22.0625f   , 22.6875f   , 23.3125f   , 23.9375f   , 24.5625f   ,       25.f
+    });
+
+    sd::ops::image_resize op;
+    // resize with bilinear without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeBilinear}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Bilinear Resized to 7x8");
+//    expected.printBuffer("Bilinear Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test9) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            1.f     , 1.4f    , 2.f     , 2.8f    , 3.2f    , 4.f     , 4.6f    , 5.f     ,
+            4.f     , 4.4f    , 5.f     , 5.8f    , 6.2f    , 7.f     , 7.6f    , 8.f     ,
+            6.999998f, 7.399998f, 7.999998f, 8.799997f, 9.199997f, 9.999997f, 10.599997f, 10.999996f,
+            11.f, 11.399999f, 12.f, 12.799999f, 13.199999f, 13.999998f, 14.599998f, 14.999999f,
+            15.f, 15.4f, 16.f, 16.8f, 17.2f, 18.f, 18.6f, 19.f, 17.999989f,
+            18.399990f, 18.999989f, 19.799988f, 20.199987f, 20.999989f, 21.599989f, 21.999989f, 21.f,
+            21.4f, 22.f, 22.8f, 23.2f, 24.f, 24.6f, 25.f
+    });
+
+    sd::ops::image_resize op;
+    // resize with area without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeArea}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Area Resized to 7x8");
+//    expected.printBuffer("Area Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test10) {
+
+    NDArray input    = NDArrayFactory::create<float>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<float>('c', {1, 7, 8, 1}, {
+            1,  1,  2,  3,  3,  4,  5,  5,  6,  6,  7,  8,  8,  9, 10, 10,  6,
+            6,  7,  8,  8,  9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 16,
+            17, 18, 18, 19, 20, 20, 16, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22,
+            23, 23, 24, 25, 25
+    });
+
+    sd::ops::image_resize op;
+    // resize with nearest neigbors without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeNearest}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Nearest neighbor Resized to 7x8");
+//    expected.printBuffer("Nearest neighbor Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
+TEST_F(DeclarableOpsTests12, ImageResize_Test11) {
+
+    NDArray input    = NDArrayFactory::create<int>('c', {1, 5, 5, 1}, {
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+    });
+    auto size = NDArrayFactory::create<int>({7, 8});
+    NDArray expected = NDArrayFactory::create<int>('c', {1, 7, 8, 1}, {
+            1,  1,  2,  3,  3,  4,  5,  5,  6,  6,  7,  8,  8,  9, 10, 10,  6,
+            6,  7,  8,  8,  9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 16,
+            17, 18, 18, 19, 20, 20, 16, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22,
+            23, 23, 24, 25, 25
+    });
+
+    sd::ops::image_resize op;
+    // resize with nearest neigbors without antialising and aspect ratio preserving
+    auto results = op.evaluate({&input, &size}, {}, {ops::helpers::kResizeNearest}, {false, false});
+
+    ASSERT_EQ(ND4J_STATUS_OK, results.status());
+
+    auto result = results[0];///.at(0);
+//    result->printBuffer("Nearest neighbor Resized to 7x8");
+//    expected.printBuffer("Nearest neighbor Expect for 7x8");
+    ASSERT_TRUE(expected.isSameShape(result));
+    ASSERT_TRUE(expected.equalsTo(result));
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests12, TriangularSolve_Test_1) {
 
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java
index 42043dad7..951e87fdc 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/enums/ImageResizeMethod.java
@@ -27,17 +27,12 @@ package org.nd4j.enums;
  * ResizeArea: Anti-aliased resampling with area interpolation. 'antialias' has no effect when used with area interpolation; it always anti-aliases.
  * ResizeMitchelcubic: Mitchell-Netravali Cubic non-interpolating filter. For synthetic images (especially those lacking proper prefiltering), less ringing than Keys cubic kernel but less sharp. */
 public enum ImageResizeMethod {
-  ResizeBilinear,
-
-  ResizeBicubic,
-
+  ResizeBilinear, // as java require
   ResizeNearest,
-
+  ResizeBicubic,
+  ResizeArea,
   ResizeGaussian,
-
+  ResizeLanczos3,
   ResizeLanczos5,
-
-  ResizeMitchelcubic,
-
-  ResizeArea
+  ResizeMitchellcubic;
 }
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
index 1307ab0ae..59496d780 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
@@ -4417,7 +4417,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 
         /**
         * fill target matrix with given value in one or two directions from main diagonal:
-        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both)
+        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both)
         *   - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both)
         * direction - in what direction to fill matrix. There are 3 possible directions:
         *   'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected
@@ -4830,9 +4830,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 ////////////////////////////////////////////////////////////////////////
 
 
-    
+////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////
 
-    
 
 // #ifndef __JAVACPP_HACK__
 // #endif
@@ -7349,9 +7351,9 @@ public static final int PREALLOC_SIZE = 33554432;
  * Returns the element wise stride for this information
  * buffer
  */
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer buffer);
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer buffer);
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] buffer);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer shapeInfo);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] shapeInfo);
 
 
     /**
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index b4ef3cb05..b9e4adb5a 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -4421,7 +4421,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 
         /**
         * fill target matrix with given value in one or two directions from main diagonal:
-        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both)
+        *   - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both)
         *   - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both)
         * direction - in what direction to fill matrix. There are 3 possible directions:
         *   'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected
@@ -4834,9 +4834,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 ////////////////////////////////////////////////////////////////////////
 
 
-    
+////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////
 
-    
 
 // #ifndef __JAVACPP_HACK__
 // #endif
@@ -7353,9 +7355,9 @@ public static final int PREALLOC_SIZE = 33554432;
  * Returns the element wise stride for this information
  * buffer
  */
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer buffer);
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer buffer);
-   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] buffer);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongPointer shapeInfo);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo);
+   @Namespace("shape") public static native @Cast("Nd4jLong") long elementWiseStride(@Cast("const Nd4jLong*") long[] shapeInfo);
 
 
     /**
@@ -21173,214 +21175,6 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
                                                                                 }
 //         #endif
 
-       /**
-        * This op make bilinear or nearest neighbor interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels) numeric type
-        *    1 - 2D-Tensor with shape (num_boxes, 4) float type
-        *    2 - 1D-Tensor with shape (num_boxes) int type
-        *    3 - 1D-Tensor with 2 values (newWidth, newHeight) (optional) int type
-        *
-        * float arguments (optional)
-        *   0 - exprapolation_value (optional) default 0.f
-        *
-        * int arguments: (optional)
-        *   0 - mode (default 0 - bilinear interpolation)
-        *
-        * output array:
-        *   the 4D-Tensor with resized to crop_size images given - float type
-        */
-//         #if NOT_EXCLUDED(OP_crop_and_resize)
-        @Namespace("sd::ops") public static class crop_and_resize extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public crop_and_resize(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public crop_and_resize(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public crop_and_resize position(long position) {
-                return (crop_and_resize)super.position(position);
-            }
-        
-                                                                                    public crop_and_resize() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
-       /**
-        * This op make bilinear interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
-        *
-        * int arguments: (optional)
-        *   0 - new width
-        *   1 - new height
-        *
-        * output array:
-        *   the 4D-Tensor with calculated backproped dots
-        *
-        * CAUTION: either size tensor or a pair of int params should be provided.
-        */
-
-//         #if NOT_EXCLUDED(OP_resize_bilinear)
-        @Namespace("sd::ops") public static class resize_bilinear extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public resize_bilinear(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public resize_bilinear(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public resize_bilinear position(long position) {
-                return (resize_bilinear)super.position(position);
-            }
-        
-                                                                                    public resize_bilinear() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
-       /**
-        * This op make nearest neighbor interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight) (optional)
-        *
-        * int arguments: (optional)
-        *   0 - new width
-        *   1 - new height
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        * CAUTION: either size tensor or a pair of int params should be provided.
-        */
-
-//         #if NOT_EXCLUDED(OP_resize_nearest_neighbor)
-        @Namespace("sd::ops") public static class resize_nearest_neighbor extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public resize_nearest_neighbor(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public resize_nearest_neighbor(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public resize_nearest_neighbor position(long position) {
-                return (resize_nearest_neighbor)super.position(position);
-            }
-        
-                                                                                    public resize_nearest_neighbor() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
-       /**
-        * This op make bicubic interpolated resize for given tensor
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-//         #if NOT_EXCLUDED(OP_resize_bicubic)
-        @Namespace("sd::ops") public static class resize_bicubic extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public resize_bicubic(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public resize_bicubic(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public resize_bicubic position(long position) {
-                return (resize_bicubic)super.position(position);
-            }
-        
-                                                                                    public resize_bicubic() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
-       /**
-        * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor
-        *
-        * input array:
-        *    0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - size -   1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided).
-        *
-        * int args: - proveded only when size tensor is missing
-        *    0 - new height
-        *    1 - new width
-        * boolean args:
-        *    0 - align_corners - optional (default is false)
-        *
-        * output array:
-        *   the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-//         #if NOT_EXCLUDED(OP_resize_area)
-        @Namespace("sd::ops") public static class resize_area extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public resize_area(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public resize_area(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public resize_area position(long position) {
-                return (resize_area)super.position(position);
-            }
-        
-                                                                                    public resize_area() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
-       /**
-        * This op make interpolated resize for given tensor with given algorithm.
-        * Supported algorithms are bilinear, bicubic, nearest_neighbor.
-        * Need to implement to full compatibility with TF: lanczos5, gaussian, area and mitchellcubic
-        *
-        * input array:
-        *    0 - 4D-Tensor with shape (batch, sizeX, sizeY, channels)
-        *    1 - 1D-Tensor with 2 values (newWidth, newHeight)
-        *
-        * optional int args:
-        *    0 - algorithm - bilinear by default
-        * optional bool args:
-        *    0 - preserve_aspect_ratio - default False
-        *    1 - antialias - default False
-        *
-        * output array:
-        *   the 4D-Tensor with resized by given algorithm image (shape is {batch, newWidth, newHeight, channels})
-        *
-        */
-
-//         #if NOT_EXCLUDED(OP_image_resize)
-        @Namespace("sd::ops") public static class image_resize extends DeclarableCustomOp {
-            static { Loader.load(); }
-            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-            public image_resize(Pointer p) { super(p); }
-            /** Native array allocator. Access with {@link Pointer#position(long)}. */
-            public image_resize(long size) { super((Pointer)null); allocateArray(size); }
-            private native void allocateArray(long size);
-            @Override public image_resize position(long position) {
-                return (image_resize)super.position(position);
-            }
-        
-                                                                                    public image_resize() { super((Pointer)null); allocate(); }
-                                                                                    private native void allocate();
-                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }
-//         #endif
-
        /**
         * Copy a tensor setting everything outside a central band in each innermost matrix
         *
@@ -22783,7 +22577,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 // #define LIBND4J_HEADERS_BLAS_H
 
 // #include <ops/declarable/headers/common.h>
-        
+
         /**
          * This op is general matmum implementation. Depending on inputs dimensionality output result might be different.
          * matrix x matrix = BLAS gemm
@@ -22904,11 +22698,11 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
          * alpha: vector of T
          * beta: vector of T
          * ...: A, B matrices sequentially. i.e: AAAAABBBBB
-         * 
+         *
          * Integer arguments:
          * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments
          * batchCount - number of operations in this batch
-         * 
+         *
          * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch.
          */
 //         #if NOT_EXCLUDED(OP_batched_gemm)
@@ -22931,22 +22725,22 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 
         /**
          * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array:
-         * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) 
+         * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :])
          *
          * Input array:
          * x[..., Rows, Cols], the necessary condition is: rank of x >= 2
-         * 
+         *
          * Outputs arrays:
          * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols
          * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors
          * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors
-         * 
+         *
          * Integer arguments:
          * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case
          * IArgs[1] - bool, whether to calculate full-sized u and v
          * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely:
          *            if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied
-         *            Recommended value is 16. 
+         *            Recommended value is 16.
          */
 //         #if NOT_EXCLUDED(OP_svd)
         @Namespace("sd::ops") public static class svd extends DeclarableCustomOp {
@@ -22963,7 +22757,35 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
                                                                                     public svd() { super((Pointer)null); allocate(); }
                                                                                     private native void allocate();
                                                                                     public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
-                                                                                }   
+                                                                                }
+//         #endif
+
+        /**
+         * calculates square root of matrix such that
+         * x[..., M, M] = z[..., M, M] x z[..., M, M]
+         *
+         * Input array:
+         * x[..., M, M],  the necessary condition is: rank of x >= 2 and equality of last two dimensions
+         *
+         * Outputs arrays:
+         * z - same shape as x
+         */
+//         #if NOT_EXCLUDED(OP_sqrtm)
+        @Namespace("sd::ops") public static class sqrtm extends DeclarableOp {
+            static { Loader.load(); }
+            /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+            public sqrtm(Pointer p) { super(p); }
+            /** Native array allocator. Access with {@link Pointer#position(long)}. */
+            public sqrtm(long size) { super((Pointer)null); allocateArray(size); }
+            private native void allocateArray(long size);
+            @Override public sqrtm position(long position) {
+                return (sqrtm)super.position(position);
+            }
+        
+                                                                                    public sqrtm() { super((Pointer)null); allocate(); }
+                                                                                    private native void allocate();
+                                                                                    public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
+                                                                                }
 //         #endif
     
 
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java
index c42b7f7a5..54f78dbf9 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java
@@ -2107,14 +2107,16 @@ public class TransformOpValidation extends BaseOpValidation {
         //TODO: Methods failed ResizeLanczos5, ResizeMitchelcubic, ResizeArea
 
         for (ImageResizeMethod method : ImageResizeMethod.values()) {
-                if (method==ImageResizeMethod.ResizeLanczos5 || method==ImageResizeMethod.ResizeArea || method==ImageResizeMethod.ResizeMitchelcubic)
+                if (method==ImageResizeMethod.ResizeLanczos5 || method==ImageResizeMethod.ResizeArea || method==ImageResizeMethod.ResizeMitchellcubic)
                 {continue;}
 
+                log.info("Trying {}", method);
+
                 Nd4j.getRandom().setSeed(12345);
                 SameDiff sd = SameDiff.create();
                 boolean preserveAspectRatio = true;
                 boolean antialias = true;
-                SDVariable inputImage = sd.var(Nd4j.rand(1, 5, 5, 3));
+                SDVariable inputImage = sd.var(Nd4j.rand(DataType.FLOAT, 1, 5, 5, 3));
                 //  NHWC format
                 long[] expectedShape = new long[]{1, 3, 3, 3};
                 SDVariable requestedSize = sd.constant(Nd4j.createFromArray( new long[]{3, 3}));

From 1233acf2ab53b273e6054f6cc2863c57342c43b8 Mon Sep 17 00:00:00 2001
From: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Date: Fri, 29 May 2020 11:01:02 +0300
Subject: [PATCH 16/21] Fix formatting, remove obsolete files (#439)

* Update/remove obsolete files

* Fix nd4j-parameter-server-parent folder and module name

* Fix formatting for libnd4j pom

* Remove LICENSE file check for libnd4j build

* Temp revert removing encoding and version for nd4j-parameter-server-model, nd4j-parameter-server-node, nd4j-parameter-server-client
---
 arbiter/.travis.yml                           |  24 --
 arbiter/ci/build-linux-x86_64.sh              |  28 --
 arbiter/ci/settings.xml                       |  28 --
 {arbiter/contrib => contrib}/formatter.xml    |   0
 datavec/.travis.yml                           |  28 --
 datavec/LICENSE                               | 201 ----------
 datavec/ci/build-linux-x86_64.sh              |  29 --
 datavec/ci/settings.xml                       |  28 --
 datavec/contrib/formatter.xml                 | 353 ------------------
 deeplearning4j/.travis.yml                    |  32 --
 deeplearning4j/LICENSE.txt                    | 202 ----------
 deeplearning4j/ci/build-linux-x86_64.sh       |  30 --
 deeplearning4j/ci/settings.xml                |  28 --
 deeplearning4j/contrib/formatter.xml          | 353 ------------------
 jumpy/.travis.yml                             |  25 --
 jumpy/LICENSE                                 | 201 ----------
 libnd4j/CMakeLists.txt                        |   1 -
 libnd4j/LICENSE                               | 202 ----------
 libnd4j/pom.xml                               |  35 +-
 nd4j/.appveyor.yml                            |  45 ---
 nd4j/.codeclimate.yml                         |  32 --
 nd4j/.travis.yml                              |  96 -----
 nd4j/LICENSE                                  | 201 ----------
 nd4j/VERSION                                  |   1 -
 nd4j/buildAllversions.sh                      |  23 --
 nd4j/ci/build-android.sh                      |  47 ---
 nd4j/ci/build-ios.sh                          |  49 ---
 nd4j/ci/build-linux-x86_64.sh                 |  77 ----
 nd4j/ci/build-macosx-x86_64.sh                |  71 ----
 nd4j/ci/build-windows-x86_64.cmd              |  49 ---
 nd4j/ci/settings.xml                          |  28 --
 nd4j/contrib/formatter.xml                    | 353 ------------------
 .../nd4j-parameter-server-client/pom.xml      |  28 +-
 .../pom.xml                                   |  21 +-
 .../model/MasterConnectionInfo.java           |   0
 .../parameterserver/model/MasterStatus.java   |   0
 .../parameterserver/model/ServerState.java    |   0
 .../parameterserver/model/ServerType.java     |   0
 .../parameterserver/model/ServerTypeJson.java |   0
 .../model/SlaveConnectionInfo.java            |   0
 .../parameterserver/model/SlaveStatus.java    |   0
 .../model/SubscriberState.java                |   0
 .../nd4j-parameter-server-node/pom.xml        |  40 +-
 .../pom.xml                                   |  26 +-
 .../nd4j-parameter-server-status/pom.xml      |  28 +-
 .../nd4j-parameter-server/pom.xml             |  35 +-
 nd4j/nd4j-parameter-server-parent/pom.xml     |  65 ++--
 nd4j/nd4j-remote/README.md                    |   0
 nd4j/nd4j-serde/nd4j-aeron/LICENSE            | 201 ----------
 nd4j/nd4j-serde/nd4j-aeron/README.md          |   1 -
 nd4s/.travis.yml                              |  17 -
 nd4s/LICENSE                                  | 202 ----------
 perform-release.sh                            |   6 +-
 pydatavec/.travis.yml                         |  25 --
 pydatavec/LICENSE                             | 201 ----------
 pydatavec/README.md                           |   1 -
 pydl4j/LICENSE                                | 201 ----------
 pydl4j/README.md                              |   1 -
 rl4j/LICENSE.txt                              | 201 ----------
 rl4j/README.md                                |   8 +-
 rl4j/contrib/formatter.xml                    | 353 ------------------
 rl4j/{ => docs/images}/cartpole.gif           | Bin
 rl4j/{ => docs/images}/doom.gif               | Bin
 rl4j/{ => docs/images}/malmo.gif              | Bin
 rl4j/scoregraph.png                           | Bin 90128 -> 0 bytes
 scalnet/.travis.yml                           |  11 -
 66 files changed, 125 insertions(+), 4447 deletions(-)
 delete mode 100644 arbiter/.travis.yml
 delete mode 100755 arbiter/ci/build-linux-x86_64.sh
 delete mode 100644 arbiter/ci/settings.xml
 rename {arbiter/contrib => contrib}/formatter.xml (100%)
 delete mode 100644 datavec/.travis.yml
 delete mode 100644 datavec/LICENSE
 delete mode 100755 datavec/ci/build-linux-x86_64.sh
 delete mode 100644 datavec/ci/settings.xml
 delete mode 100644 datavec/contrib/formatter.xml
 delete mode 100644 deeplearning4j/.travis.yml
 delete mode 100755 deeplearning4j/LICENSE.txt
 delete mode 100755 deeplearning4j/ci/build-linux-x86_64.sh
 delete mode 100644 deeplearning4j/ci/settings.xml
 delete mode 100644 deeplearning4j/contrib/formatter.xml
 delete mode 100644 jumpy/.travis.yml
 delete mode 100644 jumpy/LICENSE
 delete mode 100755 libnd4j/LICENSE
 delete mode 100644 nd4j/.appveyor.yml
 delete mode 100644 nd4j/.codeclimate.yml
 delete mode 100644 nd4j/.travis.yml
 delete mode 100644 nd4j/LICENSE
 delete mode 100644 nd4j/VERSION
 delete mode 100755 nd4j/buildAllversions.sh
 delete mode 100755 nd4j/ci/build-android.sh
 delete mode 100755 nd4j/ci/build-ios.sh
 delete mode 100755 nd4j/ci/build-linux-x86_64.sh
 delete mode 100755 nd4j/ci/build-macosx-x86_64.sh
 delete mode 100644 nd4j/ci/build-windows-x86_64.cmd
 delete mode 100644 nd4j/ci/settings.xml
 delete mode 100644 nd4j/contrib/formatter.xml
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/pom.xml (78%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerState.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerType.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java (100%)
 rename nd4j/nd4j-parameter-server-parent/{nd4j-parameterserver-model => nd4j-parameter-server-model}/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java (100%)
 delete mode 100644 nd4j/nd4j-remote/README.md
 delete mode 100644 nd4j/nd4j-serde/nd4j-aeron/LICENSE
 delete mode 100644 nd4j/nd4j-serde/nd4j-aeron/README.md
 delete mode 100644 nd4s/.travis.yml
 delete mode 100644 nd4s/LICENSE
 delete mode 100644 pydatavec/.travis.yml
 delete mode 100644 pydatavec/LICENSE
 delete mode 100644 pydl4j/LICENSE
 delete mode 100644 rl4j/LICENSE.txt
 delete mode 100644 rl4j/contrib/formatter.xml
 rename rl4j/{ => docs/images}/cartpole.gif (100%)
 rename rl4j/{ => docs/images}/doom.gif (100%)
 rename rl4j/{ => docs/images}/malmo.gif (100%)
 delete mode 100644 rl4j/scoregraph.png
 delete mode 100644 scalnet/.travis.yml

diff --git a/arbiter/.travis.yml b/arbiter/.travis.yml
deleted file mode 100644
index 30638a6a9..000000000
--- a/arbiter/.travis.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-branches:
-  only:
-    - master
-notifications:
-  email: false
-dist: trusty
-sudo: false
-cache:
-  directories:
-    - $HOME/.m2
-language: java
-jdk:
-  - openjdk8
-matrix:
-  include:
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.10
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.11
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-
diff --git a/arbiter/ci/build-linux-x86_64.sh b/arbiter/ci/build-linux-x86_64.sh
deleted file mode 100755
index 7b230df6f..000000000
--- a/arbiter/ci/build-linux-x86_64.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    MAVEN_PHASE="deploy"
-else
-    MAVEN_PHASE="install"
-fi
-
-bash change-scala-versions.sh $SCALA
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype
-
diff --git a/arbiter/ci/settings.xml b/arbiter/ci/settings.xml
deleted file mode 100644
index e6fd58339..000000000
--- a/arbiter/ci/settings.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
-                      https://maven.apache.org/xsd/settings-1.0.0.xsd">
-  <servers>
-    <server>
-      <id>sonatype-nexus-snapshots</id>
-      <username>${env.SONATYPE_USERNAME}</username>
-      <password>${env.SONATYPE_PASSWORD}</password>
-    </server>
-  </servers>
-</settings>
diff --git a/arbiter/contrib/formatter.xml b/contrib/formatter.xml
similarity index 100%
rename from arbiter/contrib/formatter.xml
rename to contrib/formatter.xml
diff --git a/datavec/.travis.yml b/datavec/.travis.yml
deleted file mode 100644
index 96c7989e8..000000000
--- a/datavec/.travis.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-branches:
-  only:
-    - master
-notifications:
-  email: false
-dist: trusty
-sudo: false
-cache:
-  directories:
-    - $HOME/.m2
-language: java
-jdk:
-  - openjdk8
-matrix:
-  include:
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.10 SPARK=1
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.11 SPARK=1
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.11 SPARK=2
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-
diff --git a/datavec/LICENSE b/datavec/LICENSE
deleted file mode 100644
index ad410e113..000000000
--- a/datavec/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
\ No newline at end of file
diff --git a/datavec/ci/build-linux-x86_64.sh b/datavec/ci/build-linux-x86_64.sh
deleted file mode 100755
index d28bc32ad..000000000
--- a/datavec/ci/build-linux-x86_64.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    MAVEN_PHASE="deploy"
-else
-    MAVEN_PHASE="install"
-fi
-
-bash change-scala-versions.sh $SCALA
-bash change-spark-versions.sh $SPARK
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype
-
diff --git a/datavec/ci/settings.xml b/datavec/ci/settings.xml
deleted file mode 100644
index e6fd58339..000000000
--- a/datavec/ci/settings.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
-                      https://maven.apache.org/xsd/settings-1.0.0.xsd">
-  <servers>
-    <server>
-      <id>sonatype-nexus-snapshots</id>
-      <username>${env.SONATYPE_USERNAME}</username>
-      <password>${env.SONATYPE_PASSWORD}</password>
-    </server>
-  </servers>
-</settings>
diff --git a/datavec/contrib/formatter.xml b/datavec/contrib/formatter.xml
deleted file mode 100644
index d6cc96bf6..000000000
--- a/datavec/contrib/formatter.xml
+++ /dev/null
@@ -1,353 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<profiles version="13">
-<profile kind="CodeFormatterProfile" name="GoogleStyle" version="13">
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_prefer_two_fragments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_comment_inline_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_local_variable_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression.count_dependent" value="16|4|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration.count_dependent" value="16|4|49"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.source" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_local_variable_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants.count_dependent" value="16|5|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_type_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_field_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment_new_line_at_start_of_html_paragraph" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comment_prefix" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_parameter_annotation" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter.count_dependent" value="1040|-1|1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.force_if_else_statement_brace" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="3"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_package_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="1585"/>
-<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_new_anonymous_class" value="20"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_member_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_for_statement" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
-</profile>
-</profiles>
diff --git a/deeplearning4j/.travis.yml b/deeplearning4j/.travis.yml
deleted file mode 100644
index 1df95a97f..000000000
--- a/deeplearning4j/.travis.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-branches:
-  only:
-    - master
-notifications:
-  email: false
-dist: trusty
-sudo: false
-cache:
-  directories:
-    - $HOME/.m2
-language: java
-jdk:
-  - openjdk8
-matrix:
-  include:
-    - os: linux
-      env: OS=linux-x86_64 CUDA=8.0 CUDNN=6 SCALA=2.10 SPARK=1
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=9.0 CUDNN=7 SCALA=2.11 SPARK=1
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=9.0 CUDNN=7 SCALA=2.11 SPARK=2
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=9.1 CUDNN=7 SCALA=2.11 SPARK=2
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-
diff --git a/deeplearning4j/LICENSE.txt b/deeplearning4j/LICENSE.txt
deleted file mode 100755
index f0d9c68a3..000000000
--- a/deeplearning4j/LICENSE.txt
+++ /dev/null
@@ -1,202 +0,0 @@
- Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
diff --git a/deeplearning4j/ci/build-linux-x86_64.sh b/deeplearning4j/ci/build-linux-x86_64.sh
deleted file mode 100755
index b8474a793..000000000
--- a/deeplearning4j/ci/build-linux-x86_64.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    MAVEN_PHASE="deploy"
-else
-    MAVEN_PHASE="install"
-fi
-
-bash change-cuda-versions.sh $CUDA
-bash change-scala-versions.sh $SCALA
-bash change-spark-versions.sh $SPARK
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype
-
diff --git a/deeplearning4j/ci/settings.xml b/deeplearning4j/ci/settings.xml
deleted file mode 100644
index e6fd58339..000000000
--- a/deeplearning4j/ci/settings.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
-                      https://maven.apache.org/xsd/settings-1.0.0.xsd">
-  <servers>
-    <server>
-      <id>sonatype-nexus-snapshots</id>
-      <username>${env.SONATYPE_USERNAME}</username>
-      <password>${env.SONATYPE_PASSWORD}</password>
-    </server>
-  </servers>
-</settings>
diff --git a/deeplearning4j/contrib/formatter.xml b/deeplearning4j/contrib/formatter.xml
deleted file mode 100644
index d6cc96bf6..000000000
--- a/deeplearning4j/contrib/formatter.xml
+++ /dev/null
@@ -1,353 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<profiles version="13">
-<profile kind="CodeFormatterProfile" name="GoogleStyle" version="13">
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_prefer_two_fragments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_comment_inline_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_local_variable_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression.count_dependent" value="16|4|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration.count_dependent" value="16|4|49"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.source" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_local_variable_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants.count_dependent" value="16|5|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_type_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_field_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment_new_line_at_start_of_html_paragraph" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comment_prefix" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_parameter_annotation" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter.count_dependent" value="1040|-1|1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.force_if_else_statement_brace" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="3"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_package_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="1585"/>
-<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_new_anonymous_class" value="20"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_member_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_for_statement" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
-</profile>
-</profiles>
diff --git a/jumpy/.travis.yml b/jumpy/.travis.yml
deleted file mode 100644
index 42fb54b1a..000000000
--- a/jumpy/.travis.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-sudo: required
-dist: trusty
-language: python
-python:
-  - "2.7"
-  - "3.6"
-before_install:
-  - sudo apt-get install -y python-dev python-pip python-virtualenv pkg-config
-install:
-  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
-    else
-      wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
-    fi
-  - bash miniconda.sh -b -p $HOME/miniconda
-  - export PATH="$HOME/miniconda/bin:$PATH"
-  - hash -r
-  - conda config --set always_yes yes --set changeps1 no
-  - conda update -q conda
-  - conda info -a
-  - travis_retry conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION nose
-  - source activate test-environment
-  - pip install Cython --install-option="--no-cython-compile"
-  - pip install -e .[tests]
-script:  py.test --pep8 -m pep8 
\ No newline at end of file
diff --git a/jumpy/LICENSE b/jumpy/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/jumpy/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt
index 9610d2890..3376bd6b6 100755
--- a/libnd4j/CMakeLists.txt
+++ b/libnd4j/CMakeLists.txt
@@ -251,7 +251,6 @@ set(CPACK_PACKAGE_VERSION_MINOR "8")
 set(CPACK_PACKAGE_VERSION_PATCH "0")
 set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
 set(CPACK_PACKAGE_INSTALL_DIRECTORY "libnd4j")
-set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
 set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
 
 # Determine distribution and release — may require redhat-lsb-core installed on CentOS / RH
diff --git a/libnd4j/LICENSE b/libnd4j/LICENSE
deleted file mode 100755
index 8f71f43fe..000000000
--- a/libnd4j/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
diff --git a/libnd4j/pom.xml b/libnd4j/pom.xml
index d682da24c..d1d9944fa 100644
--- a/libnd4j/pom.xml
+++ b/libnd4j/pom.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -17,8 +16,10 @@
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
 <project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
 
     <parent>
         <groupId>org.deeplearning4j</groupId>
@@ -26,8 +27,6 @@
         <version>1.0.0-SNAPSHOT</version>
     </parent>
 
-    <modelVersion>4.0.0</modelVersion>
-
     <groupId>org.nd4j</groupId>
     <artifactId>libnd4j</artifactId>
     <packaging>pom</packaging>
@@ -38,14 +37,6 @@
     </description>
     <url>http://nd4j.org/</url>
 
-    <licenses>
-        <license>
-            <name>Apache License, Version 2.0</name>
-            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-            <distribution>repo</distribution>
-        </license>
-    </licenses>
-
     <developers>
         <developer>
             <id>agibsonccc</id>
@@ -105,7 +96,6 @@
                     </execution>
                 </executions>
             </plugin>
-
             <plugin>
                 <groupId>org.bytedeco</groupId>
                 <artifactId>javacpp</artifactId>
@@ -124,8 +114,10 @@
                         <buildResource>/org/bytedeco/openblas/${libnd4j.platform}/</buildResource>
                     </buildResources>
                     <includeResources>
-                        <includeResource>/${javacpp.platform.library.path}/include/</includeResource>
-                        <includeResource>/org/bytedeco/openblas/${libnd4j.platform}/include/</includeResource>
+                        <includeResource>/${javacpp.platform.library.path}/include/
+                        </includeResource>
+                        <includeResource>/org/bytedeco/openblas/${libnd4j.platform}/include/
+                        </includeResource>
                     </includeResources>
                     <linkResources>
                         <linkResource>/${javacpp.platform.library.path}/</linkResource>
@@ -234,7 +226,6 @@
     </build>
 
     <profiles>
-
         <!-- Build on windows can use sh rather than bash due to WSL clashing with msys2 -->
         <profile>
             <id>build-windows</id>
@@ -247,7 +238,6 @@
                 <libnd4j.buildprogram>sh</libnd4j.buildprogram>
             </properties>
         </profile>
-
         <!-- Default build program should be bash on non windows platforms -->
         <profile>
             <id>build-unix</id>
@@ -258,8 +248,6 @@
                 <libnd4j.buildprogram>bash</libnd4j.buildprogram>
             </properties>
         </profile>
-
-
         <!-- Use -Dlibnd4j.singlethread to use single threaded build (multi-threaded by default) -->
         <profile>
             <id>libnd4j-single-thread</id>
@@ -292,7 +280,8 @@
                 </property>
             </activation>
             <properties>
-                <libnd4j.classifier>${libnd4j.platform}-${libnd4j.chip}-${cuda.version}</libnd4j.classifier>
+                <libnd4j.classifier>${libnd4j.platform}-${libnd4j.chip}-${cuda.version}
+                </libnd4j.classifier>
             </properties>
         </profile>
         <profile>
@@ -395,7 +384,6 @@
                         <groupId>org.codehaus.mojo</groupId>
                         <artifactId>exec-maven-plugin</artifactId>
                         <version>1.6.0</version>
-
                         <executions>
                             <execution>
                                 <id>libnd4j-test-clean</id>
@@ -416,9 +404,6 @@
                 </plugins>
             </build>
         </profile>
-
-
-
         <!-- Profiles to set the default libnd4j.helper property, example: mkdnn -->
         <profile>
             <id>libnd4j-helper-avx2</id>
diff --git a/nd4j/.appveyor.yml b/nd4j/.appveyor.yml
deleted file mode 100644
index 28eb2bd76..000000000
--- a/nd4j/.appveyor.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-branches:
-  only:
-    - master
-version: '{build}'
-image: Visual Studio 2015
-cache:
-  - '%USERPROFILE%\.m2'
-clone_depth: 50
-environment:
-  matrix:
-    - EXT:
-      SCALA: 2.10
-    - EXT: avx2
-      SCALA: 2.11
-#    - EXT: avx512
-#      SCALA: 2.11
-    - CUDA: 8.0
-      SCALA: 2.10
-    - CUDA: 9.0
-      SCALA: 2.11
-    - CUDA: 9.1
-      SCALA: 2.11
-
-init:
-  - wmic computersystem set AutomaticManagedPagefile=False
-  - wmic pagefile list /format:list
-#  - wmic pagefileset create name="C:\pagefile.sys"
-  - wmic pagefileset where name="C:\\pagefile.sys" set InitialSize=8192,MaximumSize=8192
-  - wmic pagefileset list /format:list
-  - ps: write-host "Restarting..."
-  - ps: sleep 5
-  - ps: restart-computer -f
-  - ps: sleep 5
-  - ps: write-host "Restarted."
-  - wmic pagefile list /format:list
-
-install:
-
-build_script:
-  - '%APPVEYOR_BUILD_FOLDER%\ci\build-windows-x86_64.cmd'
-
-test_script:
-
-on_finish:
-
diff --git a/nd4j/.codeclimate.yml b/nd4j/.codeclimate.yml
deleted file mode 100644
index 5ce6e83a5..000000000
--- a/nd4j/.codeclimate.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-version: "2"         # required to adjust maintainability checks
-checks:
-  argument-count:
-    config:
-      threshold: 4
-  complex-logic:
-    config:
-      threshold: 4
-  file-lines:
-    config:
-      threshold: 5000
-  method-complexity:
-    config:
-      threshold: 10
-  method-count:
-    config:
-      threshold: 500
-  method-lines:
-    config:
-      threshold: 50
-  nested-control-flow:
-    config:
-      threshold: 4
-  return-statements:
-    config:
-      threshold: 20
-  similar-code:
-    config:
-      threshold: # language-specific defaults. an override will affect all languages.
-  identical-code:
-    config:
-      threshold: # language-specific defaults. an override will affect all languages.
diff --git a/nd4j/.travis.yml b/nd4j/.travis.yml
deleted file mode 100644
index f74ab9ba0..000000000
--- a/nd4j/.travis.yml
+++ /dev/null
@@ -1,96 +0,0 @@
-branches:
-  only:
-    - master
-notifications:
-  email: false
-dist: trusty
-sudo: required
-cache:
-  directories:
-    - $HOME/.m2
-language: java
-services:
-  - docker
-matrix:
-  include:
-    - os: linux
-      env: OS=android-arm SCALA=2.10
-      install: true
-      script: bash ./ci/build-android.sh
-    - os: linux
-      env: OS=android-arm64 SCALA=2.11
-      install: true
-      script: bash ./ci/build-android.sh
-    - os: linux
-      env: OS=android-x86 SCALA=2.10
-      install: true
-      script: bash ./ci/build-android.sh
-    - os: linux
-      env: OS=android-x86_64 SCALA=2.11
-      install: true
-      script: bash ./ci/build-android.sh
-    - os: osx
-      osx_image: xcode7.3
-      env: OS=ios-arm64 SCALA=2.10
-      install: true
-      script: bash ./ci/build-ios.sh
-    - os: osx
-      osx_image: xcode7.3
-      env: OS=ios-x86_64 SCALA=2.11
-      install: true
-      script: bash ./ci/build-ios.sh
-    - os: linux
-      env: OS=linux-x86_64 SCALA=2.10
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 EXT=avx2 SCALA=2.11
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 EXT=avx512 SCALA=2.11
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=8.0 SCALA=2.10
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=9.0 SCALA=2.11
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: linux
-      env: OS=linux-x86_64 CUDA=9.1 SCALA=2.11
-      install: true
-      script: bash ./ci/build-linux-x86_64.sh
-    - os: osx
-      osx_image: xcode7.3
-      env: OS=macosx-x86_64 SCALA=2.10
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-    - os: osx
-      osx_image: xcode7.3
-      env: OS=macosx-x86_64 EXT=avx2 SCALA=2.11
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-    - os: osx
-      osx_image: xcode9.2
-      env: OS=macosx-x86_64 EXT=avx512 SCALA=2.11
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-    - os: osx
-      osx_image: xcode7.3
-      env: OS=macosx-x86_64 CUDA=8.0 SCALA=2.10
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-    - os: osx
-      osx_image: xcode8.3
-      env: OS=macosx-x86_64 CUDA=9.0 SCALA=2.11
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-    - os: osx
-      osx_image: xcode8.3
-      env: OS=macosx-x86_64 CUDA=9.1 SCALA=2.11
-      install: true
-      script: bash ./ci/build-macosx-x86_64.sh
-
diff --git a/nd4j/LICENSE b/nd4j/LICENSE
deleted file mode 100644
index 5c304d1a4..000000000
--- a/nd4j/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/nd4j/VERSION b/nd4j/VERSION
deleted file mode 100644
index a1a513826..000000000
--- a/nd4j/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.4-rc3.9-SNAPSHOT
diff --git a/nd4j/buildAllversions.sh b/nd4j/buildAllversions.sh
deleted file mode 100755
index 4a99b8f74..000000000
--- a/nd4j/buildAllversions.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#! /bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -eu
-./change-cuda-versions.sh 9.0 # should be idempotent, this is the default
-./buildmultiplescalaversions.sh "$@"
-./change-cuda-versions.sh 8.0
-./buildmultiplescalaversions.sh "$@"
-./change-cuda-versions.sh 9.0 #back to default
diff --git a/nd4j/ci/build-android.sh b/nd4j/ci/build-android.sh
deleted file mode 100755
index e31c4f4bb..000000000
--- a/nd4j/ci/build-android.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-while true; do echo .; sleep 60; done &
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    BRANCH=$TRAVIS_BRANCH
-    MAVEN_PHASE="deploy"
-else
-    BRANCH=$TRAVIS_PULL_REQUEST_BRANCH
-    MAVEN_PHASE="install"
-fi
-
-if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then
-     git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50
-fi
-
-mkdir $HOME/Android/
-curl --retry 10 -L https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -o $HOME/Android/android-ndk.zip
-unzip -qq $HOME/Android/android-ndk.zip -d $HOME/Android/
-ln -s $HOME/Android/android-ndk-r16b $HOME/Android/android-ndk
-export ANDROID_NDK=$HOME/Android/android-ndk
-
-cd $TRAVIS_BUILD_DIR/../libnd4j/
-sed -i /cmake_minimum_required/d CMakeLists.txt
-MAKEJ=2 bash buildnativeoperations.sh -platform $OS
-cd $TRAVIS_BUILD_DIR/
-bash change-scala-versions.sh $SCALA
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \
-    -Djavacpp.platform=$OS -pl '!nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests'
-
diff --git a/nd4j/ci/build-ios.sh b/nd4j/ci/build-ios.sh
deleted file mode 100755
index aff29566c..000000000
--- a/nd4j/ci/build-ios.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-while true; do echo .; sleep 60; done &
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    BRANCH=$TRAVIS_BRANCH
-    MAVEN_PHASE="deploy"
-else
-    BRANCH=$TRAVIS_PULL_REQUEST_BRANCH
-    MAVEN_PHASE="install"
-fi
-
-if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then
-     git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50
-fi
-
-brew update
-brew upgrade maven || true
-brew install gcc || true
-brew link --overwrite gcc
-
-/usr/local/bin/gcc-? --version
-mvn -version
-
-cd $TRAVIS_BUILD_DIR/../libnd4j/
-sed -i="" /cmake_minimum_required/d CMakeLists.txt
-MAKEJ=2 bash buildnativeoperations.sh -platform $OS
-cd $TRAVIS_BUILD_DIR/
-bash change-scala-versions.sh $SCALA
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \
-    -Djavacpp.platform=$OS -Djavacpp.platform.compiler=clang++ -pl '!nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests'
-
diff --git a/nd4j/ci/build-linux-x86_64.sh b/nd4j/ci/build-linux-x86_64.sh
deleted file mode 100755
index 8fb6c7ae4..000000000
--- a/nd4j/ci/build-linux-x86_64.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-while true; do echo .; sleep 60; done &
-
-sudo fallocate -l 4GB /swapfile
-sudo chmod 600 /swapfile
-sudo mkswap /swapfile
-sudo swapon /swapfile
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    BRANCH=$TRAVIS_BRANCH
-    MAVEN_PHASE="deploy"
-else
-    BRANCH=$TRAVIS_PULL_REQUEST_BRANCH
-    MAVEN_PHASE="install"
-fi
-
-if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then
-     git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50
-fi
-
-PROTOBUF=3.5.1
-curl --retry 10 -L https://github.com/google/protobuf/releases/download/v$PROTOBUF/protobuf-cpp-$PROTOBUF.tar.gz -o $HOME/protobuf-$PROTOBUF.tar.gz
-tar -C $TRAVIS_BUILD_DIR/.. --totals -xf $HOME/protobuf-$PROTOBUF.tar.gz
-
-if [[ -n "${EXT:-}" ]]; then
-    DEVTOOLSET=6
-else
-    DEVTOOLSET=4
-fi
-
-if [[ -n "${CUDA:-}" ]]; then
-    DOCKER_IMAGE=nvidia/cuda:$CUDA-devel-centos6
-else
-    DOCKER_IMAGE=centos:6
-fi
-
-docker run -ti -e SONATYPE_USERNAME -e SONATYPE_PASSWORD -v $HOME/.m2:/root/.m2 -v $TRAVIS_BUILD_DIR/..:/build $DOCKER_IMAGE /bin/bash -evxc "\
-    yum -y install centos-release-scl-rh epel-release; \
-    yum -y install devtoolset-$DEVTOOLSET-toolchain rh-maven33 cmake3 git java-1.8.0-openjdk-devel; \
-    source scl_source enable devtoolset-$DEVTOOLSET rh-maven33 || true; \
-    cd /build/protobuf-$PROTOBUF/; \
-    ./configure; \
-    make -j2; \
-    cd /build/libnd4j/; \
-    sed -i /cmake_minimum_required/d CMakeLists.txt; \
-    if [[ -n \"${CUDA:-}\" ]]; then \
-        MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30; \
-        cd /build/nd4j/; \
-        bash change-cuda-versions.sh $CUDA; \
-        EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests'; \
-    else \
-        MAKEJ=2 bash buildnativeoperations.sh -c cpu -e ${EXT:-}; \
-        cd /build/nd4j/; \
-        EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests'; \
-    fi; \
-    bash change-scala-versions.sh $SCALA; \
-    mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \
-        -Dlibnd4j.extension=${EXT:-} \$EXTRA_OPTIONS -DprotocCommand=/build/protobuf-$PROTOBUF/src/protoc;"
-
diff --git a/nd4j/ci/build-macosx-x86_64.sh b/nd4j/ci/build-macosx-x86_64.sh
deleted file mode 100755
index 98abb17c4..000000000
--- a/nd4j/ci/build-macosx-x86_64.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-################################################################################
-# Copyright (c) 2015-2018 Skymind, Inc.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License, Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-################################################################################
-
-set -evx
-
-while true; do echo .; sleep 60; done &
-
-if [[ $TRAVIS_PULL_REQUEST == "false" ]]; then
-    BRANCH=$TRAVIS_BRANCH
-    MAVEN_PHASE="deploy"
-else
-    BRANCH=$TRAVIS_PULL_REQUEST_BRANCH
-    MAVEN_PHASE="install"
-fi
-
-if ! git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=$BRANCH; then
-     git -C $TRAVIS_BUILD_DIR/.. clone https://github.com/deeplearning4j/libnd4j/ --depth=50
-fi
-
-brew update
-brew upgrade maven || true
-brew install gcc || true
-brew link --overwrite gcc
-
-/usr/local/bin/gcc-? --version
-mvn -version
-
-if [[ "${CUDA:-}" == "8.0" ]]; then
-    CUDA_URL=https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_mac-dmg
-elif [[ "${CUDA:-}" == "9.0" ]]; then
-    CUDA_URL=https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_mac-dmg
-elif [[ "${CUDA:-}" == "9.1" ]]; then
-    CUDA_URL=https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_mac
-fi
-if [[ -n ${CUDA_URL:-} ]]; then
-    curl --retry 10 -L -o $HOME/cuda.dmg $CUDA_URL
-    hdiutil mount $HOME/cuda.dmg
-    sleep 5
-    sudo /Volumes/CUDAMacOSXInstaller/CUDAMacOSXInstaller.app/Contents/MacOS/CUDAMacOSXInstaller --accept-eula --no-window
-fi
-
-cd $TRAVIS_BUILD_DIR/../libnd4j/
-sed -i="" /cmake_minimum_required/d CMakeLists.txt
-if [[ -n "${CUDA:-}" ]]; then
-    MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30
-    cd $TRAVIS_BUILD_DIR/
-    bash change-cuda-versions.sh $CUDA
-    EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests'
-else
-    MAKEJ=2 bash buildnativeoperations.sh -c cpu -e ${EXT:-}
-    cd $TRAVIS_BUILD_DIR/
-    EXTRA_OPTIONS='-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests'
-fi
-bash change-scala-versions.sh $SCALA
-mvn clean $MAVEN_PHASE -B -U --settings ./ci/settings.xml -Dmaven.javadoc.skip=true -Dmaven.test.skip=true -Dlocal.software.repository=sonatype \
-    -Dlibnd4j.extension=${EXT:-} $EXTRA_OPTIONS
-
diff --git a/nd4j/ci/build-windows-x86_64.cmd b/nd4j/ci/build-windows-x86_64.cmd
deleted file mode 100644
index 5f7af7bd8..000000000
--- a/nd4j/ci/build-windows-x86_64.cmd
+++ /dev/null
@@ -1,49 +0,0 @@
-call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
-echo on
-
-if "%APPVEYOR_PULL_REQUEST_NUMBER%" == "" (
-    set BRANCH=%APPVEYOR_REPO_BRANCH%
-    set MAVEN_PHASE=deploy
-) else (
-    set BRANCH=%APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH%
-    set MAVEN_PHASE=install
-)
-
-git -C "%APPVEYOR_BUILD_FOLDER%\.." clone https://github.com/deeplearning4j/libnd4j/ --depth=50 --branch=%BRANCH%
-if %ERRORLEVEL% neq 0 (
-    git -C "%APPVEYOR_BUILD_FOLDER%\.." clone https://github.com/deeplearning4j/libnd4j/ --depth=50
-)
-
-if "%CUDA%" == "8.0" (
-    set "CUDA_URL=https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_windows-exe"
-)
-if "%CUDA%" == "9.0" (
-    set "CUDA_URL=https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_windows-exe"
-)
-if "%CUDA%" == "9.1" (
-    set "CUDA_URL=https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_windows"
-)
-if not "%CUDA_URL%" == "" (
-    curl --retry 10 -L -o cuda.exe %CUDA_URL%
-    cuda.exe -s
-    set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%"
-    set "CUDA_PATH_V%CUDA:.=_%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%"
-    set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA%\libnvvp;%PATH%"
-)
-
-set "PATH=C:\msys64\usr\bin\core_perl;C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%"
-bash -lc "pacman -Syu --noconfirm"
-bash -lc "pacman -Su --noconfirm"
-bash -lc "pacman -S --needed --noconfirm base-devel make mingw-w64-x86_64-cmake mingw-w64-x86_64-gcc"
-
-if not "%CUDA%" == "" (
-    bash -c "cd ../libnd4j/; MAKEJ=1 bash buildnativeoperations.sh -c cuda -v $CUDA -cc 30"
-    bash -c "bash change-cuda-versions.sh $CUDA"
-    set "EXTRA_OPTIONS=-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-native,!nd4j-backends/nd4j-backend-impls/nd4j-native-platform,!nd4j-backends/nd4j-tests"
-) else (
-    bash -c "cd ../libnd4j/; MAKEJ=2 bash buildnativeoperations.sh -c cpu -e $EXT"
-    set "EXTRA_OPTIONS=-pl !nd4j-uberjar,!nd4j-backends/nd4j-backend-impls/nd4j-cuda,!nd4j-backends/nd4j-backend-impls/nd4j-cuda-platform,!nd4j-backends/nd4j-tests"
-)
-bash -c "bash change-scala-versions.sh $SCALA"
-call mvn clean %MAVEN_PHASE% -B -U --settings .\ci\settings.xml -Dmaven.test.skip=true -Dlocal.software.repository=sonatype ^
-    -Dlibnd4j.extension=%EXT% %EXTRA_OPTIONS%
diff --git a/nd4j/ci/settings.xml b/nd4j/ci/settings.xml
deleted file mode 100644
index e6fd58339..000000000
--- a/nd4j/ci/settings.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
-                      https://maven.apache.org/xsd/settings-1.0.0.xsd">
-  <servers>
-    <server>
-      <id>sonatype-nexus-snapshots</id>
-      <username>${env.SONATYPE_USERNAME}</username>
-      <password>${env.SONATYPE_PASSWORD}</password>
-    </server>
-  </servers>
-</settings>
diff --git a/nd4j/contrib/formatter.xml b/nd4j/contrib/formatter.xml
deleted file mode 100644
index d6cc96bf6..000000000
--- a/nd4j/contrib/formatter.xml
+++ /dev/null
@@ -1,353 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<profiles version="13">
-<profile kind="CodeFormatterProfile" name="GoogleStyle" version="13">
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_prefer_two_fragments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_comment_inline_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_local_variable_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression.count_dependent" value="16|4|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration.count_dependent" value="16|4|49"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.source" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_local_variable_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants.count_dependent" value="16|5|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_type_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_field_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment_new_line_at_start_of_html_paragraph" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comment_prefix" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_parameter_annotation" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter.count_dependent" value="1040|-1|1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.force_if_else_statement_brace" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="3"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_package_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="1585"/>
-<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_new_anonymous_class" value="20"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_member_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_for_statement" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
-</profile>
-</profiles>
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
index 5b237f65c..75024af0f 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,17 +15,19 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server-client</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server-client</name>
 
@@ -34,7 +37,6 @@
             <artifactId>unirest-java</artifactId>
             <version>${unirest.version}</version>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-parameter-server-model</artifactId>
@@ -58,22 +60,17 @@
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-parameter-server</artifactId>
-            <version>${project.version}</version>
             <scope>test</scope>
         </dependency>
-
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
             <version>${logback.version}</version>
             <scope>test</scope>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-common-tests</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
         </dependency>
     </dependencies>
 
@@ -81,11 +78,9 @@
         <profile>
             <id>testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-tests-cpu</id>
             <activation>
@@ -116,7 +111,6 @@
                 </plugins>
             </build>
         </profile>
-
         <!-- *** AB 2020/04/25 - SKIPPING CUDA TESTS - SEE https://github.com/eclipse/deeplearning4j/issues/8878 *** -->
         <profile>
             <id>nd4j-tests-cuda</id>
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml
similarity index 78%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml
index 350f53806..07d99e966 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,24 +15,24 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server-model</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server-model</name>
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <maven.compiler.source>1.8</maven.compiler.source>
-        <maven.compiler.target>1.8</maven.compiler.target>
     </properties>
 
     <profiles>
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterConnectionInfo.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/MasterStatus.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerState.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerType.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/ServerTypeJson.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveConnectionInfo.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SlaveStatus.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java
similarity index 100%
rename from nd4j/nd4j-parameter-server-parent/nd4j-parameterserver-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java
rename to nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-model/src/main/java/org/nd4j/parameterserver/model/SubscriberState.java
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml
index f1c168c33..07a04f80d 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-node/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,31 +15,21 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server-node_2.11</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server-node</name>
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <configuration>
-                    <source>8</source>
-                    <target>8</target>
-                </configuration>
-            </plugin>
-        </plugins>
-    </build>
 
     <dependencies>
         <dependency>
@@ -64,27 +55,21 @@
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-parameter-server</artifactId>
-            <version>${project.version}</version>
         </dependency>
-
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
             <version>${logback.version}</version>
             <scope>test</scope>
         </dependency>
-
         <dependency>
             <groupId>io.reactivex.rxjava2</groupId>
             <artifactId>rxjava</artifactId>
             <version>2.2.0</version>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-common-tests</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
         </dependency>
     </dependencies>
 
@@ -92,11 +77,9 @@
         <profile>
             <id>testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-tests-cpu</id>
             <activation>
@@ -127,7 +110,6 @@
                 </plugins>
             </build>
         </profile>
-
         <!-- *** AB 2020/04/25 - SKIPPING CUDA TESTS - SEE https://github.com/eclipse/deeplearning4j/issues/8878 *** -->
         <profile>
             <id>nd4j-tests-cuda</id>
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml
index 3ba5a156a..733d1ae1b 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-rocksdb-storage/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,21 +15,22 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server-rocksdb-storage</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server-rocksdb-storage</name>
 
-
     <dependencies>
         <dependency>
             <groupId>org.rocksdb</groupId>
@@ -38,20 +40,14 @@
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-parameter-server</artifactId>
-            <version>${project.version}</version>
         </dependency>
-
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
-            <scope>test</scope>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-common-tests</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
         </dependency>
     </dependencies>
 
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
index 7c2783904..f24a1de89 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,17 +15,19 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server-status_2.11</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server-status</name>
 
@@ -41,25 +44,19 @@
             <artifactId>mapdb</artifactId>
             <version>${mapdb.version}</version>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-parameter-server</artifactId>
-            <version>${project.version}</version>
         </dependency>
-
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
-            <scope>test</scope>
         </dependency>
-
         <dependency>
             <groupId>com.typesafe.play</groupId>
             <artifactId>play-netty-server_2.11</artifactId>
             <version>${playframework.version}</version>
         </dependency>
-
         <dependency>
             <groupId>com.typesafe.play</groupId>
             <artifactId>play-java_2.11</artifactId>
@@ -95,12 +92,9 @@
                 </exclusion>
             </exclusions>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-common-tests</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
         </dependency>
     </dependencies>
 
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
index d96eb02f4..0325f2d52 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,17 +15,19 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>nd4j-parameter-server-parent</artifactId>
-        <groupId>org.nd4j</groupId>
-        <version>1.0.0-SNAPSHOT</version>
-    </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
     <modelVersion>4.0.0</modelVersion>
 
+    <parent>
+        <groupId>org.nd4j</groupId>
+        <artifactId>nd4j-parameter-server-parent</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
     <artifactId>nd4j-parameter-server</artifactId>
-    <packaging>jar</packaging>
 
     <name>nd4j-parameter-server</name>
 
@@ -34,6 +37,11 @@
             <artifactId>nd4j-parameter-server-model</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>nd4j-aeron</artifactId>
+            <version>${project.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-log4j12</artifactId>
@@ -43,11 +51,6 @@
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
         </dependency>
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-aeron</artifactId>
-            <version>${project.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.beust</groupId>
             <artifactId>jcommander</artifactId>
@@ -58,12 +61,9 @@
             <artifactId>unirest-java</artifactId>
             <version>${unirest.version}</version>
         </dependency>
-
         <dependency>
             <groupId>org.nd4j</groupId>
             <artifactId>nd4j-common-tests</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
         </dependency>
     </dependencies>
 
@@ -71,11 +71,9 @@
         <profile>
             <id>testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-testresources</id>
         </profile>
-
         <profile>
             <id>nd4j-tests-cpu</id>
             <activation>
@@ -106,7 +104,6 @@
                 </plugins>
             </build>
         </profile>
-
         <!-- *** AB 2020/04/25 - SKIPPING CUDA TESTS - SEE https://github.com/eclipse/deeplearning4j/issues/8878 *** -->
         <profile>
             <id>nd4j-tests-cuda</id>
diff --git a/nd4j/nd4j-parameter-server-parent/pom.xml b/nd4j/nd4j-parameter-server-parent/pom.xml
index 4d77e3e96..3a160d2e3 100644
--- a/nd4j/nd4j-parameter-server-parent/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ~ Copyright (c) 2015-2018 Skymind, Inc.
   ~
@@ -14,20 +15,52 @@
   ~ SPDX-License-Identifier: Apache-2.0
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+
     <parent>
-        <artifactId>nd4j</artifactId>
         <groupId>org.nd4j</groupId>
+        <artifactId>nd4j</artifactId>
         <version>1.0.0-SNAPSHOT</version>
     </parent>
-    <modelVersion>4.0.0</modelVersion>
 
     <artifactId>nd4j-parameter-server-parent</artifactId>
     <packaging>pom</packaging>
 
     <name>nd4j-parameter-server-parent</name>
 
+    <modules>
+        <module>nd4j-parameter-server</module>
+        <module>nd4j-parameter-server-client</module>
+        <module>nd4j-parameter-server-model</module>
+        <module>nd4j-parameter-server-status</module>
+        <module>nd4j-parameter-server-rocksdb-storage</module>
+        <module>nd4j-parameter-server-node</module>
+    </modules>
+
+    <properties>
+        <maven.compiler.source>1.8</maven.compiler.source>
+        <maven.compiler.target>1.8</maven.compiler.target>
+    </properties>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>org.nd4j</groupId>
+                <artifactId>nd4j-common-tests</artifactId>
+                <version>${project.version}</version>
+                <scope>test</scope>
+            </dependency>
+            <dependency>
+                <groupId>org.nd4j</groupId>
+                <artifactId>nd4j-parameter-server</artifactId>
+                <version>${project.version}</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
 
     <build>
         <plugins>
@@ -52,12 +85,10 @@
                     </execution>
                 </executions>
             </plugin>
-
             <plugin>
                 <groupId>com.google.code.play2-maven-plugin</groupId>
                 <artifactId>play2-maven-plugin</artifactId>
                 <version>${maven-play2-plugin.version}</version>
-
                 <!-- Generate Scala Page Templates
                 The Play framework template engine ("twirl") uses templates for HTML pages (or in principle any text-based
                 data: CSV, XML etc). These templates (*.scala.html files) need to be converted to Scala classes using
@@ -80,9 +111,6 @@
                     </execution>
                 </executions>
             </plugin>
-
-
-
             <plugin>
                 <groupId>com.google.code.sbt-compiler-maven-plugin</groupId>
                 <artifactId>sbt-compiler-maven-plugin</artifactId>
@@ -91,21 +119,6 @@
         </plugins>
     </build>
 
-
-    <modules>
-        <module>nd4j-parameter-server</module>
-        <module>nd4j-parameter-server-client</module>
-        <module>nd4j-parameterserver-model</module>
-        <module>nd4j-parameter-server-status</module>
-        <module>nd4j-parameter-server-rocksdb-storage</module>
-        <module>nd4j-parameter-server-node</module>
-    </modules>
-
-    <properties>
-        <maven.compiler.source>1.8</maven.compiler.source>
-        <maven.compiler.target>1.8</maven.compiler.target>
-    </properties>
-
     <profiles>
         <profile>
             <id>jdk9</id>
@@ -116,12 +129,8 @@
                 <maven.compiler.release>8</maven.compiler.release>
             </properties>
         </profile>
-
         <profile>
             <id>testresources</id>
         </profile>
     </profiles>
-    <dependencies>
-
-    </dependencies>
 </project>
diff --git a/nd4j/nd4j-remote/README.md b/nd4j/nd4j-remote/README.md
deleted file mode 100644
index e69de29bb..000000000
diff --git a/nd4j/nd4j-serde/nd4j-aeron/LICENSE b/nd4j/nd4j-serde/nd4j-aeron/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/nd4j/nd4j-serde/nd4j-aeron/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/nd4j/nd4j-serde/nd4j-aeron/README.md b/nd4j/nd4j-serde/nd4j-aeron/README.md
deleted file mode 100644
index 5c5a0090d..000000000
--- a/nd4j/nd4j-serde/nd4j-aeron/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# nd4j-aeron
\ No newline at end of file
diff --git a/nd4s/.travis.yml b/nd4s/.travis.yml
deleted file mode 100644
index 81cac07fb..000000000
--- a/nd4s/.travis.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-language: scala
-scala:
-  - "2.10.6"
-jdk:
-  - oraclejdk7
-sudo: false
-cache:
-  directories:
-    - $HOME/.ivy2/cache
-    - $HOME/.mvn/
-script:
-  - ./nd4j_install.sh
-  - sbt ++${TRAVIS_SCALA_VERSION} test
-  - find $HOME/.sbt -name "*.lock" | xargs rm
-  - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
-  - if [[ ${JAVA_HOME} == $(jdk_switcher home oraclejdk7) ]]; then rm -rf $HOME/.ivy2/cache ; fi
- 
diff --git a/nd4s/LICENSE b/nd4s/LICENSE
deleted file mode 100644
index 8f71f43fe..000000000
--- a/nd4s/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
diff --git a/perform-release.sh b/perform-release.sh
index 528fbf30e..b42fc2616 100755
--- a/perform-release.sh
+++ b/perform-release.sh
@@ -66,7 +66,7 @@ if [[ "${SKIP_BUILD}" == "0" ]]; then
     source change-spark-versions.sh 1
     source change-scala-versions.sh 2.10
     source change-cuda-versions.sh 8.0
-    mvn clean deploy -Dgpg.executable=gpg2 -DperformRelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=8.0 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY
+    mvn clean deploy -Dgpg.executable=gpg2 -Prelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=8.0 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY
 
     if [[ -z ${STAGING_REPOSITORY:-} ]]; then
         # create new staging repository with everything in it
@@ -79,10 +79,10 @@ if [[ "${SKIP_BUILD}" == "0" ]]; then
 
     source change-scala-versions.sh 2.11
     source change-cuda-versions.sh 9.0
-    mvn clean deploy -Dgpg.executable=gpg2 -DperformRelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=9.0 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY
+    mvn clean deploy -Dgpg.executable=gpg2 -Prelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=9.0 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY
     source change-spark-versions.sh 2
     source change-cuda-versions.sh 9.2
-    mvn clean deploy -Dgpg.executable=gpg2 -DperformRelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=9.2 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY -Dspark.major.version=2
+    mvn clean deploy -Dgpg.executable=gpg2 -Prelease -Dlocal.software.repository=$RELEASE_PROFILE -Dmaven.test.skip -Dlibnd4j.cuda=9.2 -Denforcer.skip -DstagingRepositoryId=$STAGING_REPOSITORY -Dspark.major.version=2
 
     source change-spark-versions.sh 1
     source change-scala-versions.sh 2.11
diff --git a/pydatavec/.travis.yml b/pydatavec/.travis.yml
deleted file mode 100644
index 42fb54b1a..000000000
--- a/pydatavec/.travis.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-sudo: required
-dist: trusty
-language: python
-python:
-  - "2.7"
-  - "3.6"
-before_install:
-  - sudo apt-get install -y python-dev python-pip python-virtualenv pkg-config
-install:
-  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
-    else
-      wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
-    fi
-  - bash miniconda.sh -b -p $HOME/miniconda
-  - export PATH="$HOME/miniconda/bin:$PATH"
-  - hash -r
-  - conda config --set always_yes yes --set changeps1 no
-  - conda update -q conda
-  - conda info -a
-  - travis_retry conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION nose
-  - source activate test-environment
-  - pip install Cython --install-option="--no-cython-compile"
-  - pip install -e .[tests]
-script:  py.test --pep8 -m pep8 
\ No newline at end of file
diff --git a/pydatavec/LICENSE b/pydatavec/LICENSE
deleted file mode 100644
index 5c304d1a4..000000000
--- a/pydatavec/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/pydatavec/README.md b/pydatavec/README.md
index 70867486c..4e3253447 100644
--- a/pydatavec/README.md
+++ b/pydatavec/README.md
@@ -1,7 +1,6 @@
 # PyDataVec : Python interface for DataVec
 
 [![Join the chat at https://gitter.im/deeplearning4j/deeplearning4j](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/deeplearning4j/deeplearning4j?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
 [![PyPI version](https://badge.fury.io/py/pydatavec.svg)](https://badge.fury.io/py/pydatavec)
 
 ## Installation
diff --git a/pydl4j/LICENSE b/pydl4j/LICENSE
deleted file mode 100644
index 5c304d1a4..000000000
--- a/pydl4j/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/pydl4j/README.md b/pydl4j/README.md
index 27498aec0..2211b019f 100644
--- a/pydl4j/README.md
+++ b/pydl4j/README.md
@@ -1,7 +1,6 @@
 # PyDL4J - Java dependency management for Python applications
 
 [![Join the chat at https://gitter.im/deeplearning4j/deeplearning4j](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/deeplearning4j/deeplearning4j?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
 [![PyPI version](https://badge.fury.io/py/pydl4j.svg)](https://badge.fury.io/py/pydl4j)
 
 PyDL4J is a lightweight package manager for the DL4J ecosystem which allows you to focus
diff --git a/rl4j/LICENSE.txt b/rl4j/LICENSE.txt
deleted file mode 100644
index 5c304d1a4..000000000
--- a/rl4j/LICENSE.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/rl4j/README.md b/rl4j/README.md
index 68d5b755e..f6db1885b 100644
--- a/rl4j/README.md
+++ b/rl4j/README.md
@@ -8,10 +8,10 @@ RL4J is a reinforcement learning framework integrated with deeplearning4j and re
 Both for Low-Dimensional (array of info) and high-dimensional (pixels) input.
 
 
-![DOOM](doom.gif)
+![DOOM](docs/images/doom.gif)
 
 
-![Cartpole](cartpole.gif)
+![Cartpole](docs/images/cartpole.gif)
 
 
 Here is a useful blog post I wrote to introduce you to reinforcement learning, DQN and Async RL:
@@ -52,7 +52,7 @@ Doom is not ready yet but you can make it work if you feel adventurous with some
 
 # Malmo (Minecraft)
 
-![Malmo](malmo.gif)
+![Malmo](docs/images/malmo.gif)
 
 * Download and unzip Malmo from [here](https://github.com/Microsoft/malmo/releases)
 * export MALMO_HOME=YOURMALMO_FOLDER
@@ -77,4 +77,4 @@ Doom is not ready yet but you can make it work if you feel adventurous with some
 
 * Continuous control
 * Policy Gradient
-* Update rl4j-gym to make it compatible with pixels environments to play with Pong, Doom, etc ..
+* Update rl4j-gym to make it compatible with pixels environments to play with Pong, Doom, etc ...
diff --git a/rl4j/contrib/formatter.xml b/rl4j/contrib/formatter.xml
deleted file mode 100644
index d6cc96bf6..000000000
--- a/rl4j/contrib/formatter.xml
+++ /dev/null
@@ -1,353 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (c) 2015-2018 Skymind, Inc.
-  ~
-  ~ This program and the accompanying materials are made available under the
-  ~ terms of the Apache License, Version 2.0 which is available at
-  ~ https://www.apache.org/licenses/LICENSE-2.0.
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations
-  ~ under the License.
-  ~
-  ~ SPDX-License-Identifier: Apache-2.0
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-
-<profiles version="13">
-<profile kind="CodeFormatterProfile" name="GoogleStyle" version="13">
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_prefer_two_fragments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_comment_inline_tags" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_local_variable_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression.count_dependent" value="16|4|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration.count_dependent" value="16|4|49"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.source" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_local_variable_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants.count_dependent" value="16|5|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_type_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_field_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment_new_line_at_start_of_html_paragraph" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comment_prefix" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_parameter_annotation" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter.count_dependent" value="1040|-1|1040"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.force_if_else_statement_brace" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="3"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_package_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="1585"/>
-<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="4"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_new_anonymous_class" value="20"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.7"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
-<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration.count_dependent" value="16|4|48"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method.count_dependent" value="1585|-1|1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_member_annotation" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="1585"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments.count_dependent" value="16|-1|16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration.count_dependent" value="16|5|80"/>
-<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.alignment_for_for_statement" value="16"/>
-<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
-<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
-<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
-</profile>
-</profiles>
diff --git a/rl4j/cartpole.gif b/rl4j/docs/images/cartpole.gif
similarity index 100%
rename from rl4j/cartpole.gif
rename to rl4j/docs/images/cartpole.gif
diff --git a/rl4j/doom.gif b/rl4j/docs/images/doom.gif
similarity index 100%
rename from rl4j/doom.gif
rename to rl4j/docs/images/doom.gif
diff --git a/rl4j/malmo.gif b/rl4j/docs/images/malmo.gif
similarity index 100%
rename from rl4j/malmo.gif
rename to rl4j/docs/images/malmo.gif
diff --git a/rl4j/scoregraph.png b/rl4j/scoregraph.png
deleted file mode 100644
index dd317734e30c3098de168d3184b1f5edd04aa49b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 90128
zcmdqJ1ydbc7cH6q!5xCTySux)I|**VT|;o)!QCB#1$TFMcXxMt`+Mh*oLl!F+^VjE
zqPkblo@@FTix34laX4r!=no%0z)4DoD1G<<YVqL%2nQqx@Gr5h5vaf)pbkQkDv*$n
zOB?d*A3hL#kQ5PAan(CcgK{-eO?s8T9n{n0Dq5ZLXiNzl|L!kJfk;ANAkYC>s3@}J
z#5B4ulJ@o!FCip{IR;ZPCmRgRN|S`(%ZDBV1Z@|0c^7xdNlhvb<MQLTyV=s^MyIn<
zDOE09rGq;j9<94&hcV7Omn)uT&$WkEdMgVHsfwr%P(S}%0$;#7L9u%kNd!RtUIa*)
zp4)35|9cxE7zMuH_nxf3Hwl2m0-mu6J|h3QAb)(=F+{B3hy|1Q{^z=k@&WztLxPBz
zp0FTjLD7Fi_yDEAcWLwY2`~u)NZ{In?VAdJ*CzNlDE<ofSMCo`V1nN-86h=rr2Zqg
zz(arAe*_}%C4fw&e5iNW8)Z0io>5i*R7np{`sW7U9=Q*P5;+N(nP{)lL9(*4BV%-e
zrmM5b8SI->e`U7i0uxffO`|;8I>_9v`uleQ5SaK6J6k_qSe7yVs)|MgC_BtWiLLzK
zJ1f2bKap-;zANId?%Rb(1Rm-U+o116{(iv!Qw!cl18$46c`WtPXS5Xbwx&;3HI<W7
zYPxn0%V|eyjP@F}1Vvk{DY*Y*EMRCTz7kytc8gL{IHIbmZo7+2N0FC2+KTZOsX4`k
zxr63|&Q6>b#j>JtwFVD^wrtM-H6t1<klxw8$3rY>y)sOjq;DfmDjo~?mP0No+p%bS
z$wuHcGd1wqlHPiHtC5#jec2nG=!Aca;Uh8w$rl=YT<3xTk|J?J8Izc<torh_nVufj
zPsZH8S2h(nMjE(7rKzb(sD|vRU_wjs#ZQ+Tn1`D%>Bi*#W2nC4M8I`q(PE}3Q;^f(
z3l@F__Y{W~KPLYMDXA$euKf+UM<l!ON%RwEz0Kh0P}TJ8;!3&C81J2oTiy6(=gNP-
z2m_fR9<b2Bd0T8D$v?YPOsgh|*O*M%)|{We7^oJXkfvX2DK6)g#9A=Fvs|mL&i&hg
zCa|O;=8yM*NqmVyWPnd&)RYdThPk>Ll#z1~)D(?JoDt>B<E$MNTF5zZd-#QG%ncoO
zc{xm0P!X@HQ4p|8)X_Ef$FG6EoRcAfBV0W*bV&BQX<K@)2ABMG#r6Y4NJS^3<7Pfw
zM9=vDz8VQ7(^ID#KcQUguZcUzf4{UlO|lpJt1^vlU~oad=-Vmzk9#ZwpH211PmTJ!
zAR6Px!9LMvy9bNEuElT%e0DFL(v;-SPWp<<0AGRRqtlA@*DZ&6fzLLW4q{6F2{d2+
zZ}BP=AdkP<)5Mmh-ttl4%VQIg|3?XJVBU|NQK!C&ZNfqaWIKiwUbLri@6r)$Lp~2}
zJsA*nl@E?WHl#f*GVJ8gGaZw^i=~kYcKCGEN7lEyx6)c^Id(fkso&<_^-ROgU!uS_
zieOqM^sb}|0!Y|Mgw==Rw3*vO=EjtefucQxTCazVl{&qO%NX=ho*c`Ox9zB_HWKy!
zeD+<Kz>@-Zy!G)n)S=O7v}E_dw#W0+RMp+h=HnT2J5!6KI~zSM{5AetouqfMzpO&R
zhQk_ln{tq+nwstOo@UwbHQrUy>;JyWFD)p-O=I2pq-KftN?H87r0%GYZlrE^L(IEq
z<nvf5ws|?#phE-&MY-Z?b!&FJ7Kyg*3W9{gS>Z1Pv`;&R72O=^@2?^7hjIis0_4JL
z;cC*dTqqu{vh-IQc)2#ON!7eWFUpqfwc|>E_bVzHT<xzUPi7xJ>#C~O>ORc$<gp5w
ziF17;C5_CM3a1!bQB~0csF#-0{6>paYf>$2$*w0V84mmy$BolNsw!x4Fy&Ln43;tV
z*NNJfQNBC--&7hF8lD!QZIX}SFSHJ(qT%yG#Bf_$-RfAfnK13`YFZRq6zo;2&n5m+
zgQ25UD=ErfEW_5+*EJj%Vl@=0Wt~%md&-&mI!IkkHNrX&qv5QwDCLm5CKsffD06x$
z;3E5gK|vWw{Aahpz#7w5P*703h`k(M=?WTy5Nd9{PKs{?M{ssmoKKezyq6Db6Ni77
zF{jAL6&MgYx>|qvQ07Sd9)EO*GZ7swFMsdgxw$78_@gvu4yz~`z*+POF*uS*e8G`>
zHx|S}urFRLDZ`N)Q^g;zn&z<KS?;QDI1BPStvTlG_j*cvbN2xrQr3%DG$}<zlw0C5
zCRJ5xv*GXsQ$~v28JO&uka>Y`eR0;gk^<J7pDuz~h?d6dHS`;DGN(0F8cGD1jKF8i
zykMcxA!6QBwm+NiYSe1sd7+%?-Y@+veCL_)2zk%fOV&HD+iZxl#(K*y*Fj^mr}r-&
z{S#H++%A6=%<!2FRwkExyV-YEQD1PdugZteuy;sho1+wK8rdBlsM!}Y+1s5jDJJ#R
zu_>w5aJaslXe}XVqbd1SoGp-~*3_C|sfGX2zCrsQH+*}<zPNgF5K?{B?4+5xeco^A
zQn!+MyTAPmo#1mn9$0W!yPFo{)^Qli84#zc5wEXdHsW_bL|0Ui?B7DfUtU|E&r(;<
zQddDx-nq@9IYgC99G)CSrl_Q&nV6w&VxJ5%NcG!s>GwwKii&=$I_1hBZDpKf3D|O{
zXR-`vI#Ijt`SIA2)<^4*u0I)UlnuoAEG|IQpV#wzqwdU=d)}q|4D0duY}NfiTm92h
zX88qSapZpU?Tz4q{QC}8Ktff+)`|p)a`*WSt8NipC^&Vom#<VqW`R9O0qY=;E!#ie
ztU&hNe}fT;ku!;tIEI+HReWxp!U7>OT;3qJWz4NG@6M33>e_v*UJVj%eW4HWQDd3v
zJqETPAb($B<YIbE+?qe1x0R6z&p2p=5O2GFx*OFuetk)~IbudZsNs06`_hzgVLC=T
z*jKfueI@%RV0OnQGKnUhkB#5|<P=MTR)jI#%RumcGhAY$mu-(|!#}vcjI(5p`2oc+
zF<R$ZcF?THvleahC#o?J`@T-R_Mi7h6k)$4w0PIG78IFk%tj_&70XJ?Nps7|hVpy6
z_qGHKei-aicu-m<|06jLB1RHM$3kDNZ*A5DA^#NbquvxcU;XUZ_jT(Vwvc$<4&LA2
z#g+N!)&X6me{#q$t1pYPmZNJm<>YqX<30wKM?ve+qr(rnzD<X!mr^40qOqndXTM9v
z=~j$8>G$2R#R8MNPXB&|1>T><m@(Vuk<NGiFf^OmOVlV%h>t>v@1!v=_<l361k7lk
z-_}qOQSnr1POaa&`h4cSH}G=!?af?%sCPJ5eoxBy*WNF<yI|4<2ezdrUd7B=aZ_10
z-<jkfpU~o5qOfF>a+?0%@pG0<ZnFPbHne@en<90N;_gQ`Z@^vK)s}o}6Cd#b#2Ah<
zY($&NlHzmstKo^4ev8El&RN5ee{f{R7Ux>q0f@}9vc6(7b=|UB>`5WjFm<KokfN)i
z`RL;B>`QmC-)%nnQPh7N8CV`j9wTeuZ?t(k-i08TX4H`oK9&us;h*6=_m-_#PN7&g
zeCdJsy<6*8W2tYeN56u$4v+p?(f$iCij~4UN420c(%=@KT&y({#%a~la}$skpb7J4
z+#vkNVylurMDwv-vx)?Y%7yp1z=eD5BG(1E>$*StWxNRt$ZC%UJA++j(M44oOk^S`
z@NwagRaxn={4XjN$QXHvDr%}pny0g|nD33IF~F28WefKHt=b?JLHZX_aFHTL&c2;C
zyIU?lw}{HONc5++?TnsF`3{K3EF3%bCr@lRCeuiT-YyP?VtE)Vrn9#FYRvytQ8>7<
zdd7RV%)GNQdLCx${x0)J<V4^0{(2I87d|huZ|+i#+xM_Jd*`>CQp+gw3*yR=Gdd<E
z=$x97hg2SseTr$iMkSQd>x)BI&?0V{>1~Vr4);hLpWm$)*r|xGFMyWXOlR0e`mfk`
zT;@*%Zd`1vr)S)l6$rO)W3mEzM>TC&D<D8)07NAfG;I~*$q*3E&XNJB?8@2}D_^63
zn1g5s<@^aV66-I-=RtMyY5O?ywM1Fg7CW^&11%#Vwnk%9&ftqZizU0}rEZ#J0uK~2
zoT5!ublf>j#^_5{#nMBOR5y8t@IL>^k55wTNnWL6CpIrHy7&U4=MZw&U+IIh%nxmn
z3uJH7k%=d>!}32l-`Y(y)9L%aXY}=B;&T<*p|(An9uMLJ;@`LH5K8=ylv*5p{*P#?
z1DNeuo~c`Hh;riZHeHbkiDPwOX$M;>8S~pYXB+l^kARpViZRPS;ozcTrtJMs<iJZi
zJq8D)_nq){BRqfp_{99AJtMskS-oOs7r)HR$XZEa4dtDZEkjvpO2Avc!R<U9y5EDs
z?x&vX$?`mU{HG>@WCXe8GkvAk^Vu?*>)2iO@pLTz{iMGr=Xu?ncS#EU89oAT!^J<?
z?AS83fL&xZUE~gY*Sv?hStc`!+0+#~^-lrw9n3_2UFB}P4sY!(sL8=FhlpXwcei!y
z^)zkssOqlndNrzvYhoLXCnqL{LOuASt=`z$LSMgYRI{tp=y$wQK-HV{PRyUO6~?!H
zI`)3o(8I;+!a$STKzffx{&C@C?`>J_?lS83Oy_KXX&)*rX{-tWS5A{3&D<`1v?X+3
zwTJh|27VAQF!5pHp`m?7BICcB5npY3^0Q<eBTphadzp5YEl{au(4o@8_>A`%8xy-b
zx(bhrl@gpAEy*+Y#%|UJ-t*_5o%AALU}L-)&{t33@lw!x@@2<DmQ}=GMOF|!IIlQI
zjX`Au-E&$}Sn@n>Ef=+F{K|)x+{bF>l`1~36Z&V7YADaA%48e8CPp%*=TnHam(9;d
z2z9qF_gj159F}ipZ|(aZz`MYq#GSv(r_ENhnhZSF*JJH$Z9b-ZV*k6{8q;8_d<!ev
zwtAI>xHd^R=w4@Qho>zs&+Tg(oKL~q8&@jTh+pS+RejGGNW>=HaFH`YQvPiAPsry^
z=VNO1(gaN-Q!=nT+zV8q@|@1xVrB_EzK}q_5Yt9Zxe&0SBt4__y#miv({-K5ck#c(
zgIS!s_~pc?e~Z5K9xJ1q73WDe9{rJ=y22yK6YyhhH%>4dAs{OjkaZqcFaB<x9|^r*
z1}}bu$x-_fdCp!aD%}WGM`lGn6l!yD(kac+{4N)NsPE^a&=30eMQG%{WgQdgSr-5s
ze%gn9TP}q<1#qKKh7s}I^IcvdIj*9HM4(a(9+N%M-<1Lj=6!#>p?=iJVsja@XUX48
zy2YYzV`TtafVU$_)JbfD?-}vsu%$_4Y#`|lC4(wjPWp1Rxw{zv?4Mk=b^h#gJG-xb
ztEZ=<=j8xzf%na&o$>TQS;uYbguK_fqi`ayGa(`->ZwHUncfbJg{!}AL0WUMO}!{8
z4^BtfEGNx2t~;|6k_ivlVvE91kdyMcqN(tZS*~j5gRB$&*U3$RuUL$1dps$LHq2?g
z+x7)^IG17gvGDM(>q%Yw^$)325X3Xh&McvUSYP?)skj3Bf=roBcjkJ?@lo?h4i<Fl
z!lc{i)O0sl$876@W#io4Q@WF>O{%&(y#r_a0$#l8x&KuQM+#)KTejuUB#D`L^EHAE
zpQ(ZUCI74ab`HNs>2VC_P_%4;JV+2O@d-pvyeH!#J}W05_TCLtO0cZGrMT68=UB)K
zz~h@$g)+~rrth1mUm6qF#&uoGxCyXjHPiQh`gbzmTb0m`-PdHT_NmYFVbCds3B_C6
zRWaM#`Z<q63xk=7Ld{zs*GQE6$ux$u`IYsn;)39uKJ}sAck1P<kyppM`gOXCL;1Y)
zQxqb#;;8z5;O-JVWqRWN`x%g5L3})$CK@NG<sRZshM18JcMs#hAD226s~NZyjl3AF
zjnpoYL?ILOw`=f+*zC4BUB<ZYLTc1QBj!SCg_Xt>>ME?#N=E2Y-GD9hXx+NpzX4+X
z7x8%k??{gi*ElCZ-r4Hfvd2RpX_BPFdhTtHont*S6{&P%O7v$wMz$ZJKRRkr=)<nz
z^bo$i26H?#Htn5f^aSW5DpnE=5NX|pZqP+F&)WxX$x`C`y_9UYzT3ZV1L67I;;ncw
zwl$Z{K^sryS>;{wYw2aX>>h1ON|Dzv6B`#|(qWk!24)IZab${hQ4~Jcq{B9~&r|3H
zSdO1Ty*P;n`KG2v#6A~QgwL6G<s&lE7T0lN>(|TalohXtoB`9@i10hNKo3g%F@`U4
ze`@A+zxJx9lQ;LRS?RgFm)k0IHtxVS_Z#pdb5xS+%3$@odkLRuN3`Da?%|16vbz)I
zna*1;Mcv3D^h15uzE`<E+aTLpXn*q(ZPPT3_#9nGH@8eA>{Ub$JhTL#DKD2E(XE1F
z8|=lmOWSjBxf2kW)8tFL4liWu;D0@N6ddGb^r9HFl#@DUCxP~HJkXLY!&>bevw;>L
zGhE6p0Z~SJERy4)_`3hTu;N8x`PJWu!}GCZg?NP;MIG1%hlpE$9zzXDcOR=>=@Z#h
z@>4B)@pV0T+MI<(LON+}>RsfWWJg4Ew)lBD@d2ahPu!OvFaz=7pY&F}#heS%TRJQ(
z4Pz7)Y5bh-U{G!97Rm9-elcp({fHn<Jl3ZC>vg>&XN*9gX|KTtb<SH~=lauyR+1@l
z?v|#VZ%0XTbPq1TJatib&WE4+oAS_?u#S7gv+NtjZhnHgcU{Azw6i;(JRHC{J@U5N
zS6>M)Txm!4wEEBxX;Ern;NtN{S1KMakXpRa&^;u31nqrJoMJP&VS&qVDA91MoOkr&
zxX*Q5RkSWxMY`Z4DW_{QT$5Pazu$iyeR*_2xWNM!`)@y%`Ts4KXmmi@+-|mehr4eu
zFZjJWKI>##(Bu5pk8A0kVmw6^&xk30PHUL;vgaY9Vp>iUQi=u)l=Rek+C3Q_vaTI3
z^Vd9gmd%T}n2w$ptmT}pwL}nkDSyYt!NHXYl|)wl2#=8W6&S^@YwFeCU62;U2Jfi;
zWmENmbBOCvT#sWq-^yEG`K%^|dFPPnbab|5(}w>XvWEF09j+YU5A5#5`yI%8NyNIM
zS`LVbOtdJWCpeeQBU$^Bhd~WNPia$KQVJ?qv#S!osPGjD>_j(17f%;Mib9w#*v__6
zV*zKxo^<cEeppgFwNn}q4GpUtm*xEyz^26fnAYuay(pbf=XLvG(X!Unql`N;bec7L
zCGMLC{E|e5>F@aD3x+HWd~$Mf?jW7KPpYM+)uxt<v-AD)9hHJEcav8EUTynabT59f
zyivZp3}<$?9DS*plA0&NCl2>QFkh(#`vT5C*F*fP0S$k!^yL-Dt%mepL2Io<FVRj1
z{GPS9%y(^G3ys%DW0kHg%t{PIU2IlA9hTCcvpKF~SxBEpH4|imiwbRvGTF3LL}FXb
zzV3Y?PaTC~gfg^Xf2`H+IfV%DD%XfBS=F7Bn41CUdo`ccZ^SF=bCVH)Tc{3xtgc^N
zR{5!;BXiI8r!aC5XkR^gE{q2Ew2*qfh&-zG+HB-LJH)||lpkL>cb5xI<re9ST1RtE
zji&kW^=q#@VXk|RsKHx~!2z3ZVZ#3XMAk$wL){e@xU<NL!^-Gs_kMR_MWI>&)rQ<@
zLeE(lx*}QSBQQ_v3hOGzD1Y<&b`(8(z|O$K7EU@_@as(UwZ8{A?a<L#i?MmnYA}B7
zR}d#5`Td2#=DvZ&<I-cFOP{(2)Zwa$Z+XVa;=kRcGQn=szSb(MEE7C>nf4ZWC>r^Q
z{A$s{aaVH8Ks+kLOTkf2or@xx+;B9T^&J>Tw(*^@b@iNgpq?b*b1vt&iRXBmI=s;B
z(%snDc*%JQOzDA5gdiu-4nosD6xY9;&{K^y9T}?9f*G_-wV;64Hg9F6=clJ{o8A@D
z(@wS=7<H=o5$Z1N{kQbx7=mopnsXZZRBS&sau#w=YM`7x{{&%(d=ivb1z><9sx$Zw
zC)rl#rGxWSa&bPI6Ky0jhKBB`hU50Ji>#_FF*#Z37eFxzeDo;ZH=Y3ryd*2Zvl}Ey
zJMY)}>Ni&di~<sm*lNC2m%f5t!yJPt8T0;i!{)A^C#>-RIc>?1oWkG5gKFtbC56qa
zSMIDkJ+;7*UYzP@x7y81EkYdDnf&ZQ3-u+{#LvG8>h1m{lqr(&J)9r_);9zD!!K3D
zHYHRX;Ckk{%?qwAX0Ec~ZkqYI5RY0SKSccl!tyu`C@NxpP>|2tB<zaR2y{7L8dttB
zPvROKHk=4vOnCGmPZAUJ^xYbC8eW^?;}XMjl%o)#<fd{bn=n~&A>yE*p@E0xJLeuX
zy5ypBl5G)f<$|z<m0%A2iy?qfE+roosx9C<gqD;*+fC1yyKu;wpo9bd2O~iJigM!~
zw5@97EsV=E-<F(0b8Oue7f2ACljHz)3rNA=S-S*z<La~t6y%i81y&kD_d0*1yC
z9)E6uXDh6T6%c2#0;=&*Q_5nt$w0E<mR`xbFJV?(keNhxP4=4|rp8ozlZj-{l`Zjl
z7W(}6!AS1Rk1bhJRaZDUS(7l5Ad$>DkQZ6Fh_IMY`K-G>W-XcjgJRHDW+CCikNA*G
z$D(Uo4at5|3$smja6wH_Q&Cfm-%&wBRhI=UH#m@A*Ea^x5uc|1DTc*B*1WCG-n<T<
zeOjHBH(qS|7NuS<LU;54j`--ipOwZMsYR!_7Z)Cr{c#Y}S|^k@Klvd>zO=(_mfD}V
z>ad?hHE&?1z?pQdOc44$N3KTE<)GyB;q=88Wg-D?&0><sMMx86O^i_IxXR(22}nak
z?%7@$8!1z_<c19+8HM37;UQP#3?IyXC#Ifu<;n$3(P(1M(3<v@9WF{)ihk9Mplr%4
zB!IY+U2t=YUtBzR`%=G`?qS<3up*Rz<nm`Ux}MSQXTwG#8Lk<J6=a)qR>tdW=q|o_
zs_%;DTln2j%Ne4)MamIpb~JF_Xo#(dnMmuJ&Exd+wWYsRGSyR*8*gqr2Xjy2MIrUk
z^~8`qbY5jdIoLoE*tnv33oBUk;_P#MaH;B85ck6+Sm%Z<z&`O?-JLv$`hA1su2QNQ
zkq_0vXZVnqmSR|59IrERQ)>~X8XaWsuo!v;9Q|OEu{_)SAJ9!T+xj?hC+^?rjP!@k
z#;q<}Xb(jZT~9>n`Xq;X=Q5K^L`=F$2((;!;jmJQ!-un)dvbknFnL@fBVSTD?${=r
z6$nZJ`GmsT(Gg1cgYAxolP}e?!L;la=@=Op>YFCEFqcTPf+HB+2EykZ2fz5yidMcO
z;tJyim%_uav(cG!n-Wxjk6Lnc-;9AzZpA*gsDUvFd%8)gh}WR@hhtCq4mU;xGLGMT
z{xq9?D4>@9HN4W31&A+=7*G24b{_svq9n*~q#$qWB$S!O<p#6f-qUnZ*-JR-A6-<e
z&1r?Sn|J%vWIrfXip9MHTXMzrJ}43BN4X!k61ShrPbMl5{e-;mt4-~?M(QJ@{gEY_
zqFwskvU`}B_uNqpBZpnlw4oGIaEKlM4AW|L6El~y#f`C6LkMKrKG?u%aII%kLWh#K
z^O~W-4ym3rq;D0#LY$e!AVJxKs?w$!A&VI?U|?b(x$h>SPzDq(^ce7{z0Y~dfrp0{
z>2`hK3)^?ZbwnNNvJ<~lY!xRO8I7E>NAXqGT#*$dsl=-3vLsB}^2^CoI7_f^**{PO
zHnx<L^w{_Y^|yM`wqI2GH&)h~vUx{L(hkDyG2fOLyQrGqGd@X$1Z1ZB4Lg-7{-<{?
z0Dh^aMdXXjX|h8+`R1jq<=PoE{bjS>ihhsk8<m<4g?V>wO0IXViHv=ba0BfSk`FCX
zgZ|C+dTRDU+zvUCw~YEZo7T|jN($dSy-({+ky9PH`EeM<2WHj-Jn$8_&fQnqH~i<t
z_|nei!4i}`LUQrATtNM&shr;K;Gxn3?Fm3)qVh`t3*d<UtET@`lTSCt8L>%>*Uk`1
zJaHy=5Wx-e{gK`5{Bc9i0*1lpNL8%L476pWWphg-_$|-ChGnIeY*X=xT$qVb3nt@u
z-lV${tRv-eH(C&7dGnx_LCu3U)3^r^oE6B}<@giqt3ujO#UQG;=z(?KH}ulxvl^5&
zch9pqyoHEhOawo>0@NoE$poi{{bGkZ;-93#lY*zUhsa21lw^IpypZ`J+C;j&dC(-^
zW3>&P9E`5}rzO~9jaSxq{4QWD>|a<<)2rb?T5X)TU!x`L0}fp4fF+nQR60@UV|IW<
z%~bGWpu~+|OtKJxU5+#K*-_wX;C}zUQA}l;y^&66b9_1^N^5piUdk$5N8hPvDtr+2
z09^8de<o_r<No=wttdxVV-aQ%z?W@jd&d}}NWVp24XCfj{U@l^Kd3SXHPkg6Adb)F
zegZdI`My(?^4?V5@a!@C-X9|_)s`i*KFlA&trmf2V|UtHf=R3x`;vb1eDe}!+3WmG
z%2}N7=T``5CF_wU?m1n=8T1*LA=2&L`@-N<zGAaA`nbX<?M3%*Wf{G7J+<1?B?E0)
zd((Z0QL%QVZYQycLg}_{A9&{MCalT1pXV^}F?ZxCvqT~&JEy1Da`j<&8Ds`$`*Qbl
z)1%T^4I(xex-qeDVSoeEUvaECT?Z_hs28Taa4u>%>i*0^J*>pCLXp0`#FN3t4Sd=X
zM>!YdhV<{A`#o7eyX`6rq4lYcey5mwOg}DM!IEBqqk&^XDknk%lQfGRUXaXf*n3hJ
zwKy<87CW^q21Q8%4r_ZWBw^)+)}tUgp;HesZkI~yCBF&(ZSJ?e)v$wpZ}KOZ^g}Jy
zmr{e3ARfO0p9uNL65{d+IBnv|BMRcLUNYfQvGhouw53ONq<tjBlDXwyfvCu(;t=bA
zC3r9R^Qe*IRIDMZ`qH7@kfN^f*uY3Kp3%UF-J(tb#tB;rGx=OL_5m~aqFI{e^~2P3
zRbnLI53EhY2jYR{;e9(<DSkSd#u-33c}mj2#MhfIF@T;$W(Pj8Kpx#9Ph?_^6nW4U
z(2qiwf@f??VZ|{{7s%BUSaS4~64)1*m+e!*iuaVF;gH9&odp06Hx56C#nj3TZlPq*
ztpwoX>=YZ9SKcKE?1yt*VzF<y_d<zwiw}WD9oa=Jd?Q{?ZPff91i|M!y@{?t*Dfe2
z+veH8XiN;fw9amU?Y!yq!&)=I_#q%*mS-NmN}38W-KxBQtc13Va)g#{gqEhign?6b
z_BZr_K(0xyWGt#W+U{OlQtIxH-=V)U6Vf+;m^kK#i}whe8UPkqd&B=YT4dvCCah#N
z?PoK4W^;H09eE*a<R~D3HTdl%DAzCiC!i29<~U6h#}oy-c%=(`sRvqhTQ)<NKnc6X
zXp+5KEWnt=Na7LJspRe1N%7{JZxjAAxh$XU$2X@r#>z_r^UhgbgDY`3cF2c9__U71
zZwE`gM%qP3AHfU|<j@-g_rn(%%sB{i$)iQ60A{_%=1YFV+xsF=BL1Toqw8%`vC7p$
zYTKBGl)rx2=_Zx-0J$JDU5imd*r2q4ku}wrGoOL>Q{Z?IgFKSxA3)Z(DiBzf)#0xt
zR84x``JFAlv~zdmpzt@`Gz+ZZabg)Rzv>r}$lNh9PSY-cM45`OD5=4uM#o|g|Fi&I
zjkfxDR^^o;F~&FRU}a?0UE0F;$XwY=Rkl(z6PakV@PM~>6QVA1sMWQy8Jo445)YAT
zoBG{c0TXu5Y1#1yq1TrPZ@GPG4LZ1p*q_ZBa%|30rubf{Y|~?y$h@%t!i%JO$$x_)
zM-><u8TsjwOU7}~O9ndNmEkQFdrZ6bC@jH~-)AYI%yFg|zJhYpGF-gc>BxJ+LyG1`
zCD~a%CYm(%ClX~T;LymZS8Tv7!ajhSPKGtFvw5!)#JW*17vobV!T8hk(rFS>RMoV`
zaVL8zRKA#J_+mFCy_;w35oEri7C!HX)S+-dxHuod3vE&7h$Cay^!#*E+Up}Fnk~Z4
z7RW2d@;jws7~R(^?THFrl_2<jrZLic?|ybii3uwhPul*cVv{173%EE>+p*BZWqPa;
zIiK(o9$8iFtf`g6VFpH2^O#iEgyhpl->cvvO9Us}AL-02XKu2EsOf^E1c{s@B{f40
zWxy_l&gS<d;xrT64)nDhYwpLOJcU<WiH)SLl_14xTjfDAWAL3IRA*jVe8$g8!AE`m
z)(AU>``+AtD%sue4@`>$RG2#{K39VwP2~*Eui(}5P$}Gw;5mkw+q00aF&jOzb#QZ`
zdg_BIv!Ft0Tu6HaiuUI?lvkrbl#_BY_m=*sAj)0AJO9Z4I>#7*+H{g5$VbKLzD=0c
zl?FMY80Z4)Lh+U5F?aJ()6N>XcOu#OK_Gk1>sOwNh{$IV%12WqRv>5Pn(^VCP{yDQ
zf~eFV=T%tR!^vm2Bc=|1rpaP7=6gY}WL&X){#EfUS;Hfs!-yC(S|1zEck-}3dD>}n
zS3mfQFMwNmGoGO6tN%F@CYB5-+zLrt?<o)2Y(sWcL~?YnVgppqFFGDzWgaHNbGb5H
zFOz|ZsY|>L9dw3WfE^WqGeFD=W1vMU0|7yYn^Ogq#^@gMX#zN*Cg1~Qe}^>B=zS+H
zCc^8(_Yl$=PlVJj+81hC$)XxYjwWFE$*n2Thg5xM2ETPT_0ME)6w6(YLMM@ZuId#W
zHeBh;#Eo+B*gesxh1y2uzlfG#w<R3{91)i*$|<BdllR2_;PSuxVq|fh@e8Q{dul6J
zMet`xqGpxwcA^f}S=m$~VoYY_S^9%6+B4)}->0{bdck+ExB^=YTIwSsa~I>{x9c$P
z?>mhFk!+rUGlf%kH^#bK?xq}w%uzZJ+7?mS^Ge10c!kF^$04{+O<y-BR6-v&`2y<#
zqat!W@iPDkM9~Yy*yK^1_jAcz%~@E4?cR5PifNVZ+2PPc?AScPy~FJpcc);}I3wN)
zzD<?y0NDT|_A9=?ZPTs%f1&U%a*)rbH7N8mwe-@F5u-SKGptToq#xKTGXgvgKM56s
zS_Nm7c_<PnX^+kodO(ZgO&LsO6Xo>wz}6Jxi7SFU`C|7J>rm=80Klp9A9IPt0)m2o
zdF`q1@$(+5&cmy)GXdsKGY_yNuMp{lap|vZu3uD){2H7ts1u1idt&HEI3;^z*zZYY
z{m!)9{VAZU{g#05jxaHE=e66JVjtgP|J+?1n@ei41w!Rp1|zMPuNWJ?aaCj~BI0j5
z9cF82I6FR%sM8)rWbZnSvSPpyGacf3qU}w4IphUzE2?KX8Au<Zn=MyV#big?sk4n~
z65br>r5ITn2^}`6jX#k?_cIYAhNEeC!&`69tJ(OuW3>;?&iqet$1J;^b@+IQveGkW
zS}qr3Ws+c{VJ~Ysd*97rASh>P9$T{N>Xpe%QjSKhk78Md$Wd!HO@JST20=93_Idop
zOe0a_;$x0EkC*#Ha8grX(>9W=(cIk(b=E?1A=b-qB3yy!1i(n(u?le548R~39Si5M
zomS$Pr??$|#0b@o@F4p)^lKvT<6tz@JV$)o>u#yDNvO%;Q54n(UGBbVxv5H3_okuI
zF_KZINwv%NCpDNS<cfq+$z-$`l{9Un)Qtv+xiv(jCuYgh<0~49G2#fV7FkZV>lGTU
zGNz)7Ge@aI*8N=4iHX2To7DP>N4x?=P87NVulx<jO95H;Gxetb>H{*lz34P+?vTp|
zNFmc-b?bnb&9z7u{|WtE`%^@4tc2$~81IH6(c%&0I&4+Pwd>{QuP?P54AaqpS6pOF
zVr-X~kx<1bQi=|bhy}=T=N7ppD&!x;G4ZxLubKH%=WJ~aKV%LtvK)3CR&VV0b%^JR
zT{3H%tz>(`mY(&zQwjyf!Cto-x`MQy404FabfcbE-<oER+;T@bZ`JKa$rl3`YiSqA
zQl@_B33#sq5rAC)_}%d?c25dFx~7;WVSs=FjL5fo$RK&ndULPvKq3{Yj(*|wU+P5V
zglG_b5OLzBGT3wjeYTQ(oqWYEdc;U1OyX2kUkN$-ypE}_8!7RH&r<CA{+;z9a`@W3
zR`w>hY#*uHNHAc&NWY!vhjoo*H?l-;wLd}<Ypyd}igU?@NVnymT>C^UuWQ?&MfF0^
zMMW_(6DkiDb7YMa@B66fy9AL1G6DM<a-vbK0cihvE7D^u<}bQ$wHmq531duHr-)K2
zrRa<fXA<S*5dk`F6Z%U1Enw_2@4)mIk#%48!yMu+8?G^zZm}c2qAy?iCO$(^M`uTy
z;4j&oOZp;@4Rq{<t0WX&cz@b&c#ZPz%3T&aJ3R2Km_j9wJ(HXmpK>bP!P_RWIkizc
zEj|fg-u1%4!2Ri0#Jt6<=qgr4I#0Y)&1cV_`(O|afK9u2L+@{T*Fz^*F?_Z6$>9y3
z{s*;|Ms$%~v<)+!c_#)wE^nVb?+m|}wL&)yomaqX5SR!0sO7yU;r$P(1jq@uJ_i>T
zgnH2|cSZm8!Y)l)YCbYONs`t1wL&ycTgj(7KA~_Tb`r9VEJ-f|hkkJ<sZ18-WG;Yw
z1qIroD{usSD_W2L*FL3dzP)s5A)Enma=pF#&{&R)sTy3w68Z#WFK!Q{!zhfj!hzzG
zE5VTnKc8qRUHTy?$LDK~`hq6$#u;PK8Rqs}h#XuJ-K8(u>&$I)w|F||<u%RuK43rO
zWA2bBu>qWs1H@meV#5#z6Cel08os@dNFj6=(y&L>LlE*MM=AA6sct(Ni5R8jjE-4l
zIfY#^R=1+Nyz_FjKt7jeJP&z4?VbAUy}dfCc%9vBrG0vZ(a7ojva2@WwxZld=axp<
z$&%Bj)>P~Ha{97K{ATPqWOLRU>E*)X(dRn~8kqBC#_uD7_GB1y*M<EaR3FrIDc|^4
z22(Lla09#PF)1v@#Zn{$7u-m|wSPp{%ZfkpYAg11DNhP2R}ObtQB?8IT}QnqlL78O
z1M4e^^#R_I@@c2%?b$25x4}H9{K7Z4jvNAf&#H~X5xpp)MXwrsR69x{J}>(UP%L;g
z7Uft<Gtg~mepJnbuzHr0?T<R_a4e)r(Q?;m3^*_VhSa&qg+9vCuQRsI%M0wQb+gTA
zicF7;H-)xaD+a`dOKB(e_#&njzsNHt!p*y)`V?#VHjfQ8ToX-9WFrUs`~6oZdQiB8
zk8)ni!+77Vj0r4#W$AKDdo-r*2}ozsX{NiGx@!p-YAL`$FPFU8Yv|2nEa;!7+ME>c
zTe-RW^8MOpU4F<oQ<kNTy6hSKo6ncI2(O6qbG(yVrerF><}oocwNUR@b>9*42(DEx
z&k*wgyeGUD;-6Vqd#tHja4m3`5{W{JT?%f{(%89Min7H<&c{x-@DUa>7LG<N*M&@}
zQFD)D9e95d#+`3FOwh6I8J*-MNo#|n?ql(EeG{H*A0x+OTLza;An!~Y)o6>V2zlf!
zn}%9h`})nmkzWj8%uCzqZ5X!XzLl4dOu<BJpt%qe105G4gWf8ecNA~U!R*XGD5zXc
zYlU$+tL3Z871pfmbhxjK2rTO;Q?V13fBu^VyA;8w%6zPz8CM`~&Q{JK-Z_!rrw<40
zZJF*sEI=W{FXbH#CV=F*Ht9-BFr$9$*u3|9b{Mo?<?uT|I%?lat<~$!4=14&#=>%}
zMyPuNisq>dTLfJ>-9~qgip)M*J99HlDo#FkJB=kbKEH&)7H}RTk^97k^JQLhP8ULX
ztv5E}maxg>d;ZT%PjxDflVc&1%Mr?ynJrUCJ!)0cF9I_eqp`E0=egqkDNh|o_$<q|
z6wXR~ft%`E>Hn4{0xUPMoPA_v%lj8~R0{EO^Xhz0lpFd^S*tRZ5vYu~mU!mg+P8;9
zXx#;#o;w8wFG1a7bmy6GPY68`Zw_4gAQL3gDDhN94lK$W$HY4A4CUDeDAbQyE}EN6
z5=uPTfVtXkO%^wN#)0^2+WxZ@rtu#TGY*{$4Mcjs`yVZAJ%?C-N1aQU0r;Q`m1Ls{
z^nt06A4rw<W<7o99D^zS(68z-fbCSHLT2ij*o!zDY>6bReC?VXt2mzk0)w^#DrhKp
zIr3jABHXr2K}3yz^Aqu1W^4wWN%G2j3Z^<#F_cG>!)*Ix6nbRWE)9)Crr?n1IyuyQ
zJ;mej$=YDJ%Ab~Xze?W-#Za44^D9swYciBLY7<ka<*Lfz0j08Bf;b-W8<7|(Oh=jR
z6V?raRgdD<hS2US$o1saK>f;+9EEk1li?<Gf%bn&-d*C9=Cb>!U_)UTVYGhh^~&Ic
zj}mj*<z_fUuko3jaw4%G1MHVs2G%iRpFT4COZl|VXsLmP63-oNl$T%qzV(Sj+Z?~!
z9r+!8y*_VlR{6)uG4o1u+vFpB*$~+%=B+P@ueMp<Jno1qG!VbKMSHVC>r7zfs1F@n
z%B8eE9T8iMwZj+VA#m^XZwGHs*^>`fLzZ{g2m_18rE0#%tUm3(VM*i*YJ6FUB+YG#
zx|~icAurux;r)40o#-Lffr8<k-AS#ODrqSMchB#W%MYeL(sQ=t-onh7{e5jOP4mfT
zNr#PZi1FVHj%XJLUi5^m1X~s$T1JWC!{G=rjodcvyVet9TF8w*k?ou&!BV&&_^%I=
z6xk9ndAl^OO01BCWe)WhTSHoOlquwa8)MJB1TNsrfadleRZv`GSR;t(w5ZI`;5H`s
zYT^GSm|s3#jxOp3xVb$%xVZ&<?j_e3Mu-11%!dgEf^f$fC9Y4i`YjJ(ZF4QI^(WkO
z9s%yIY!++uvwD;ReNbmg6vAnnS=v2IcMamT$BNOck)nM=#nA3%t_Jf*X}A#}1tfJo
zvn9;;W|sa11cG&Beuh#q(7ymbHQmLT;~gR;-S98E#+yy=zuG1Or+`eD1|}^xip5|#
zF@Sj2q1IEcd2nFR*b9dT1PZX|h@Pq--?bB#-IZdO?U>e%H-66a{8dh|8mDbt@>>;+
zQ%cGTuc|huICok{Cq&JPj=E@ip-DIl{7+JYiETey`{h%Y^|V31{kn0ggMo|Rc(e(r
zTWoPR>w)FJ!xQzBFnRUXJt>DiIBzdBhRvm*N<BKUj4dkKI5M~ga6NwwiL`Mut=)!P
z0_2$_&3y7-*{vSw!|nTO7fZg6e<a?B^Bnk6ZdotQUt-iA_H)<+TZs(}!!tsN^*6M?
zuns<GthLmfHW(6@-|^0Z`9d-r!96%>=}n^oW;|L#AkER_YYuW2A1h!hKEE^K^^<TN
zIm5&LugV37Qe+JW>-4<*K|>Rky|ZG<Dv<Q9ujnQ|d4$6M!C-*xBs;(b9w6&^Myy={
zr>r8fVQiUA?DdA-eCig}Do7AnU*2Q6ge%^CDBb-fL7>d0`c<uHv8IGm{7Jl*G=~@!
zd)VK;;K@h(E;Ov|c!s$IhQ!5R!yi!vHX>NF14>jTP>VjW2;UA-S*|;{3h=7TK*f!P
zh2I$>sYB!<Z|IX_Qt&h6aU4^*{-6on`z|?dZeO38n{|ofXFn_ILvL5W_2(~2GeRiY
zmW43J2saLNk$7u9o`-i1|Kp*iUaVL#ai(Dr=d=`aj5X!}PlnL~I&!#OPGgBr;@)&|
zn(xv6bvcGHFg(wy*Nwwodc8`zOegX_Qm4d9O@DC-acrcUY!;PzF(0enX5RAQ*I}bv
z140KD#ESX*MU>CqtLCK{2*Jpiv<ffL!K)#11pN&C>_yE<afuJ12if?8CA=cVh9~&u
zPa|ca>W^7R`Q#e(z#2SQ&}4BuMZoQIN}*a#6z~FVtwh=}_<qRAP%JTRXtNi(@p{e8
zZ4s{ECwj6ZjmQQ4!o-y;7|5<<(z=u?v9Jt15fzP#M$)nXo)AQ6&1ei@A+vsGJFcRG
z(Bqr?Jc-=3xYQf!n5=@r^I6xA^1@C+kcl<OhKIxe$wCoCN6-^aIzW6o#ah7xJAzfb
z!E^Q}K#y;Yc&sRBq`C17i9;#mIe36^KuQD#XXHd!sohD*F}hU()_zkagmOW{3?s#$
z9+f8fHyTS{qB^O2K;Bj%Q6w`K-%iPt&#o<nv?gC?R~dWc<uNhr4yT$!O(~u$?u_bf
z>N3qIM=w!9?}nj-Z%mZ=;`$lBFMlGRFN#2_(*rg6kMYPvtfY2bh|65?t<82vq10MX
zYjr2FTC)}Mfxdf3WAY6m8n^>j|L-+p)R$NrtVd;PfoY=%klc)<@xAn_+m!aCgGvZc
z=r?^}m2i2?=i`_vg0V3_x8@Dfa^NB8;&{i1kchYQ`H!~zxg)Wk{4;bx#i`5-VWv27
zzr!4Ae!Fi3j2M58-$6&yx;lqQ(p4f%bNWSFbIK666nYu<i#znP*nf)9UTWyd?QsG^
z(D`53PoM&<^)_fFAi?O<9qKPYe=#w;5qtuGa})W3<)|#WF~J5M@LB4hHEP+k@g^YX
zrfHO~^AVE1stJ1%rKnSRj;c#;4?>052+e=Js8#urM}vY*DwHE6ER=iRNr9PjI|s)(
z!h4=33kFoP#jrxSh#C~ssICfoY!E?+uS^>L_(S`(RG}MjpB*+`8eRh3)9i-jOwen4
zn(PDQli>uNb`-R}bLGfx<1p=_YF_Kt;KZ~)s%J*Ne^@&L_Lh7;NxAdst5a5y5sjxf
zL*epnZ~{idlUK{Ul$PD+)ej@B>WMmRc+Y2pI+PSo!gmx1k)pjKjhj3qM_b*F{=!=+
z4#*C)<sc7`oREw`92gA{Ad0ij!OhOF@)_PKt1&8-<8iUraAX21nMti%XK%PUk-u_j
z+`zVpiL_=(npBy{1k*5l5ROibdyO9l*frO*>0v~=ssMqMgHF``7UEw_@r7?r>TTYe
zA54(7mnm-to$IXl*5uywQ{O1^Tb7qS^8A3zw*&!bygb~Ou)ar(7y|c#9KaGn7%BEj
zPT5mb|2&bJP9YBq8XgiSnI=9`&*la37Sh6(1D~7rQz055cp@4tyGUZYvFTjYU63?#
z({{x%u&Rt`BM3rzZ==*l%GgxNaLHH?CE211KKE55&aN&H$HqqdA6_c@5eoc-wFP5m
zF>#4`^2n$d#fTW94;Uc$A=>{CUNx@4(u>XUFfiM;HfQ)-Soe(f))TznRzWk`IL7O|
zFR3?sd}6>|fb8EZk?re+KG&HA@zgFqqgd@nce@wBOpLfVDM^!kII&8PGRCf1oJKOr
zKAs5{1V3wb7LX+IlUE!M#+Xc6l_u1RufHKD9dwqv;hBI7l>f=K`l%2Z-_I}Zu#@?m
zjN1DoiS4Kr!NHus6yNTg5wx%cd-<Xjqy<#Rix45lQ>R})$m5@5Q5w}?+s{mcN4!9Q
zA!GHhmUXq+tH)5km1Ug0+(S`BEMx<3;=J*D)5132Avte)MR!^~#Me3T*K8xV>9J*A
zRxUP5TsnW>ab1f~pVSe*`T0dVOlv`Ck@t|GW<4fm+A}C#>NhC`Q_C_vN#;uy5em9|
z=S0mCjHBWC>3B#Y;#&Tt6tKwQICFchOz9gezcA1KZ6^)S&**2^{W0-Q&fOYF`8#=X
z80GtV|J%b6u;+|T__6dD=#b!U4C2xd4Qf$=RMsjZHS2J%IiX8GtHmqdqb-tnq<>u@
zWwN$!T06mzOXnCgS|5hS)<Qknv@%BAv5JL*@6U{Wi3F9kWBL_KMI?&^AIZPOg5inM
z6hBHuM_;Jd7L;*=ks_ORt#ivll^w|Ih)XrTk1dzep!U)P-a<i4Cp<vSQdWcjWd~(w
zTCc)ns>lW$D?eyHDE;GoFv;!ec&oZ<w%*y4zqb!S$MDT?F*Cj|En4=;`v`}|!RIdA
zNnYArMHsZxVXY!!7sT*wE_B0AC=rkNDu?qinG8`2@QFo8BQYyM3v)_*%Cl6-TJW{%
ze#1yL_OBI|{U!>t8Du|dZ#XEnFMR{cfI}5E=gVD={fHgQsc4{m(j55px0fqdMATc0
z+*(9L@Qu?h@^2+h4a#rwt_kS+$Cr%t%(=c3yA<#3-p33uJl7m=NrU0!YuD!(EcC0+
zuTMTJyI$S4V_nfX59kTOXKw1a)AAukp|_w!!1>BWyna}#2~yfRX27KvVgLiR^EC7j
z=VL)!`hMa4e1#b!>&Jk-+AG|;5J3Ec8_B%(Iqr}+f-yg<&M<{{DMvC=?hh>HL(Yu@
zv1o|~a@&Vsw{4+dSHv2`&(b7xrcrFb@i>Io%I8MbGwKmGTnxO=n8qPf;QjJ4vimS3
z;4t4hA`5QpI=r#0!~cJx4Dej-JrJOY40oGEQ^Tz|Yc?-=wpRZ(dJoVZooO|Fp(b)D
znSV7`e33q(9bp|rplfSi30lXfs)IsdkJgDRw_}>j))r-rQ~AwlC1rv>4*AXn;CT9Z
zpuk1Xu}r&zr|<j<TLR&rE!pj7z|fMTRLB9(N2NG9?v`8ycp3F}!w~CK{)O;umOhd(
zCMG6yh=Y-;GJnL_(}`8G4g5g%R?2X@CEdLi63H(6&j0asa}+a@J<s#{)nWbpc*-!P
zZ#nO8Zc@I>+o#oN)0Vv=P>G$w)Hi0m;tknK8gZR_H%jiDw?N?t?6Mnnqno_!HraL0
z?Pyu=KuLS@P7*P2#9Eqw(dn0&9H|!`v|-w9@PrX34nGR<F&r11AbRa|P*bAMl@xdD
zkom~MX=o{E5*W}j0PV?woS`U$exp;|t6Y{JoXn(Tq=~dgQXIv2YA_;2`9wcrrd`Oz
zHX;!$O2@He9x$}l!kYd$xerMS!VL^`EBHFvt_O?s`VFTU_$8dni*u|TTCV2tZ7$#I
zPz6bB^@PuBaATiGy%z`6^Bx(!wM1gg+|(O0nkUC7ibSL?p&8je1k`alRS6s<fBski
zcgb9Sh&HG_gp7zL{xD52oo+9O6fY#Zfkt6PN{5ZN-8#{VIVVUY*$`aIgM8><U@tTP
z;jeJIG4LHUT1QzTYL3q!Q2_Q*MGx|92VOm6m543{BH47=b{`)2{|;7c`qKW%?=)s0
z|Km8!EMV@H#)|Z^YBQDdn;syfb2^@#Fz0oemwz@tTA_D-Eg<d6$_Rv>t5AM4mu3sw
z<u!O=^$uJ|gFh6F=rBELKLhryVu+Ddmqc#>Fc0<1+eWJhNR4>ErmV);Dge|3)FJ8`
zMxcPzv6S8?Vo*ChX;nnxJgQ)+80axrmw|)|^gxIMM_DG)9x)Hu!r}07%%06)<}?n{
zB0U$ogw&1{CB9;QcAtN1lJ;dp*W(p24ojP+e#5J$@!jE|v(YrS+cK`G)(&2)shjBg
zs~H6=GBU|brmT$GQwt!7)!*1ThPW(5nkdn|T(7j(Eh55+do!6e!1)bwKR#`agVOA*
z&hRw81Fh#V5iu^4^NB*eD5~x|M(hRUH?t~l7sy9VQ{b`p7SeBFOeZu<2Lje?*I0dR
z!f@1@B$d3f^Bl8U-JD&5>6eOcdwuUyBo0~-xHLbXs7=G?Pux#BUoux>eqozPB-Z?F
zU8<-#MhY&yUCR_{bCa-os((|IkVpA!*}EVP8<qr(t?3c>c2QLi<B*2aK*U)to~!l4
zFDISCJP`_sqBLDC8nAJiJQt|?@y25x_n?RW&>M2Fwu7w!;)4B9!0Kh|5wB2NgXQLB
zKk7J)17qFdvHe`r{{W-x_ZMzTeDvdjjoyE9i%?uUpv0oh#g9CYxPDH6!AU584=E8L
z3SqgT<`jiGzpD%mqCahls&5PtXr+$X-|Yi1k})tfYe&Y`7gn&+gd8Jd=@@`m+CC;E
zccCO{4V?dq$L-xTb5KA~k7(ZjukE)K#06$$VIs*00iF1InMxQTNQ0H6-Sb8S_4@MU
zxx-((=cVP6m%l#>>rk0{{m^1m0axY=98B2c`)7u<iVr-)8t?UzR}Rs7n{&ouLNBY8
zkTuwbao>f(IZZpMBG@FcHgzA!Fw8f9J*qR48!u$KIoVT>s)@|Y$(T(73T?6H4Phg-
zKtfGL(Toy_BJo&Q_7OBDx7T)_cexZt)qhLdVa}Fv8+<a=D;zZn&dd6&isU|jBq<PZ
z#9gTh7Jl?J;a8P7sD;=O)PztHH1TK#8S!3a+SVdgurB>H=zUv~lW81cuDgTdZ>hV%
z#!u#J-hX54YARSoHrIo=@BS2!eOUr-sO>zaDr-^X$-KR_|I=&*g_cMhgh2!AxX)(p
zw07n4Ye()riO4{^QsFHOaj}W$CN!xO7PF)q(ah-4hfzMFMKMYh;xkfaLX?iVOB+d|
zrr``<b|ZrFqatP;01=l|k7)NuCUo9nbqI+db<A@>Blak!P+Er_Y!>>K+F!Vk<&J%8
zJ{8q<Ls%v480UF4=BcZzQ)x51F-Q#mRp`S7W2^J7SuD?76r<i<-6{<{A0@nMDD6Y~
zYlmD^<%|y(o_^T=lj0gZG}LzmvWxKy!%d~U;SGmHf>MGCBX!nDPNE(ls~*48csi^-
z4Nem?VZHX2a4tuQAN)~9(bxMnkIs-7L6|R-J$db?PuJ(j+)Z!1xwTGs#)4LS-a$pV
zhcd_ikF9s`u4|3@M%%ct8rx=LCyi~}&W_WVjdyI@ww*M#ZDYr1a`T>Z-tS`EKOkdd
z?)9v>=KSg5cCzBdN%Yh~V7RIxb{lBk*~{IOJaQ@V6}rs+=YzKmkLs5rM4Yr-q9Lnf
zZTctrJ+_+vZ8OkK*K5n1@$FRgjBoWO%Cj)Q-v_78=}cR`v&T@}yZu6__Vv-IV9Tla
zMs`PJ@-#cE_GrGRR6@B<Ba^m|G}+V!?q~S~XYnsT?rDfT<WtyhopXj)Dr^#bDM*Qk
zoGtzR8#It@%96W#=DL4ZhR2^|$UCFGdl2(;vWM@R4aR;J0c`YqrKP}sbn>llB(zED
z;%j@eD1Drh9OruJKoG`O|C?;%Py|DS;G(94k$_MTwu6Jzz2I<xA#LKGLQMlEPOxDd
z{Z|&R0=@dZd52VqQbzf4i>O07QR1&e3)bG=^!`LQDpFYmu!LieCw=eFX8g1K0dy5u
z;fOW*UHh>eV6geRs7uC<U*Eh4cUDs{ak<mtL^SH}Co4Za(zn4nn?GKlW}tA*T1m7a
z&MgVCQX+0PU`7S$vyF|^WI<e%hK%+}_P#6-n=2CPoCqxVN@a54bz^hq{~r23osAYo
zkYCuT2ljy0f2<v0<N4@JiHyC2p^gFb_s4nsjIusj>QN}i)dC{c>5WGQGrz9a%eR|-
z#79!k528ATIevcc&Tgk}@9Ah?FNEnkPe&55pF)p<ZIDj}e*Hx93x>3O{d{~Pd?SQa
zK6n5cJ~^P*c;uLhk+Q=ma@7U)Qo!U~(BwAC_&3|%gbq2w7IKi|ocYM<%IwytN~1`*
zI8m!F#|#gge7Qr7_&waj>_Lme$gc+L4F5>Pdc05tCs3t3AnU*_VeZdd5hL;JPW0oe
z>M>pMG0Pb+6l?SLLf@%pG${>uc#kl?sy7zld#IT_O+E$Eco4Q|zwIa+Z5fl?>cn2w
zmpQH};6ALMoX<~Q?P=NK4+!;hB?Fb>E*#`r2452H5&j~i;V^~GTpERd2&~(!zZ(;u
zC-~zH2hIH8XVJS2|5Ier>O(|BNJHM_ZNPL)fVua{YrNpb@%XeEa9lFTYgJW61U_cw
zR<!yI#hm7P!Iw@APW)?^9cTP1`CD{==p-li{;U#h1GD;G%JpJX_mw!dP0hqxfha7$
z3*T2#=bEdu;6C=K!!8;TRykbYIdn#%^tRSh^h^(Mc6EnF%NGil)W;7$_ZN$gLR9Z}
z>j6HzWr)_wajW1fQRTelF9X@2Y;Mxcv6oN{>_)M9wanCv)>HWX{2`C1zlORiUR1%3
z;t=}~>fH<BLEkU>mAGYO=UVnqsPbL)NaLVR=o_aXd@m}f8EZa)2YJQBWbze7-8B1&
z{-`&l^s|l<yMoR#swuX-oDQ)UM#2rh7v12LC03%64Ml#a;E2F~yVfKFn1gp84U}|N
zuGjk3d5cj$DmV#Dju_GH>@AtdXPxdkWmHixXY(##U+1NG9@M`g-AB?h$m%?Tf8<9$
z68y&H=pl`~)~S}%%hwoln0A;vARSM&l7A^HP^E%O!ON`hiC7NRXw0;iTt_8W<uuVJ
z8b#s<c<ZLtlrpV&Ab3~IOWeuJd%Jn`ftn26HgH_Jhq7ST%DA1OeA!1-v7c!8#TM1k
zuSV&cRaa;t@W{#8fq&bJ8cMi*)m0O1)qPw5mQl#7vHyAzP)SFf+`cC+ok789TZS3`
z{*glmR}S%O)bLk+0+62{yZwLZ6PPF8*kUGDcC=Q{J6=8Bc^|{1$q@+Alnxz`j`k!9
zCq$`bXci(6nU>Mjtv{mj$1-}oKpwt8xpfee@!HpyoLo;WXzfH7WE9lUV$Kq8$tLcg
zLz(7Jf!l8WUj1C%LOhXkk?}HGfn&u{3JJDh<=ywTf-FPJHzipXqIKQ08WqH@OGnoU
z5@#rX<zY|wIv*qZ)5aXxPLk?jaWMGDo8L2<#`Q;9<PwMLywb9&7k37pE>VB18n<VS
z8q=)GF4|)67ub?IbXdhqw4Lx5*bp~VG+S5tzV;W+?Kq^#Q24$wwTM4kd<oY5549me
z^8-mlOJ3q$oSpHKbX-W{|DEjGslCSz(^Bg!hJgK&w_`&>QaT*c0VP2(bdnZCC9zS+
zB7f<tQ^6gT5nE=8OabPp;Wud=MTpU7F<A+ml@v<!`guA`sQXSjsLNo_yO2;8DTF9`
z>_Ywn_bQKHvjp<8ZHPnog&1#retpnVqLmE34sxRJCG?5F2Aq<OZH^>Ao?D(%X8v?u
zSSWb1=d#@H6L58|hBP&28S9(;A3!$zerBiD|7*)^l=E`#H}ZR`AnTvib~N$iYl>DA
zjfWU&+7Y1r+%&g4r_#c6b1M7)A|ZNK#BwZiz73y{42QHo@|%8KP@ZKy+s}Ny^^cFM
z(YRB(R$mIbRmo%@?YktGk=&y!{ljj=+<>O($LZ{^_ZbNp8`PRmXi!^_^2}uy*xc*)
z@sf~f#kab93_3#!_AiFTbKp0{wQtnq+6k5bjhZ5GAC1zTbBaFe1Z`votF^2hIfovU
z=ft~ahXN(U7BSUy%@J`Kcb~&&;5>g`s%Czx5WJ__kc8%MFou}Wpwm;=&s3<EBj*1H
zd~3m5e*^`!EyXcw;=dkbi!#ARqRFEwt659mT-UU!rmSnkNK?iF13y=Pcc;MrlHc$7
z?{hZi`g(G1tg`F=m#13H?&H3W5YN^{Mg48;2D*E7%aqag`Kc1KifJ2MG=-C%5Rg=R
zVq4Ap`axC4;POaTja+HhOBUTp#^OD!gG;wpU2ZQ^c7j^sm4PK%@|y*p<`sb?)Lbt8
zupnn=eLKTUD&CaDwQ917YQZ^`+8N5!rl|n;OOURE{i|L@OSN$}w{rdt0Xz?*?l0<S
zOMbUm+R1NFS}yx+bfu@~yQeLL{aoOuKMph|7MgxVe*TmHq{U>{?H_X5F3fs}{(s_P
z{%ZUa7c;(EY7iepkSLRG76GYuPIN#TfdO>~$@Q;abNig^%i6+N^;JYFUOTV(M)YkH
z?9Ekl?hIzOk9)uUC0+aFt9FtHq;CsK6oQoEf@44SFqGR2Z)twMTF-7u!@4)-rtM&%
z=aXf@c^2(R%|$O}#p6ECB+_6J!*_%zHctLwkYf0`8u=yjIuv&<4$5h?f%%)l@Murm
z?-%B-thm=5*^q#FpnpCdHQ5gAZ!TqJ6m5ZJ^{GVKstQgGf8&HBMlO8<@Y<59oT!UZ
z`Qf~dD1Pw5>ObC(=Kn#+IiNaenhFZT|37_iINo>;wk0b0U}EsIhbcvIYfc4;bjlBK
zx>nG<nFckzsOhuNuWD*SZ(p|mwCS9$Fz`{)!=mjU2Qav^fZ;HwHkP=$y2L%d!<J)h
zIDX23qk$`XltW)+>#BL-05@E&{)47Fotxe6uruHDuAneL=^dk|o3u8gRW?dI8dzI#
z;m~b@>(F*p><(TNH<bicnP65>v1;WfH9^snk}gP*JiKi~D4b%B2ZZ&{6v>PMek!$b
z$8mZ0`hyHXqW3|aq@6D<*zkUHcz|QhAU|#nR=b`lKbJ!n#_V63vA~j+64zkTI&s^&
z;F({xAT5D<NcXY-guph`6$zCKX4ZhhN)5IM*Mge;WYvRLY=hDJEbbos*55N8_;`%b
z|LVa$v&J%v(XwA$Tuf!_-=dYedj2f)IynjJgb4M<$$v<yd`SK1%!iy^B8e+Hn=Yt8
zQI0oqZ%Y~76dyP|2?T<SajW8G;-IsTkG4uP&6}2DfJgLPNHk1>M~#O$1TxM}1l)l{
z1TLc3`u0$`S$JMiH>XrKEg_!_J}`_~78}>}`yIy}oBPS94f1-Ud$Hre;7I7e$J@U1
z<%!~F8TS<_EZOAh?1?8F+%e*(;`Rk=9roW5>Q#DM8+ftdF}G-LrM1aPQE0q=e;J!{
z-SSf+`6t}y6xMm=wjp4P4eGCC8Efpd+SrNb_Tc0?4c$@MFp$!$%W*q)M#LB6Xv2<a
zs-i8Y)4(Ra&Q~77IZqZ15{&%E(V{qK^#b|3S0_fEtH}b)-?GIs{|xWX#13Yes-Q;p
zBAcIPB(h<)`kvPd-Ml_rKZ!eLL0<%wp@)gwMRm=@>V5T{LhL@N{{LHmYrydB(}Fm|
z`Xi~H<{&G*$>(+^?K8@(8{e|U<OFKQgKXAB4O+}y4FL|#?OpQihE?yC^S){LB@`Ez
zpKcXV68zy^G2@C)DYU9$tlQ)Cq_cD^7!sQHsEAnGUB?7(K-P~&!QMOQ%H6Jsg%1uy
zXeot`GYNDG&+LzL5GT3~cLzcwR{&Wd10SPtc&eFk3pN460R7!r2g;DloH8<jr2-P+
zZ#E`~N?F3WW38O87A+BwGm`&LIqY>8+S9@qWtVfIv!1P~)w;ca=M1Iq{oztoBRpag
zsW&OSBR_ucnE)AuINt&E?>14B$WS#^y-oK_PQ&;z6%lJQ-6D^PqR2)`s?m;<RW2`Y
zoA2b>$jiB|#V@J)#@<0oE}Cx*w;NN&z3Z?{&QR<f55%RyL;(i#<W3GhSZSHZJ_2Go
zsO)VyDfrksOS`|tO<vn=XHqp#*&_iZQt?T7!=~99kemrPc`}z)mBL_kmJnLSj8bVy
zI+}hg#busZL4LpY_l1c5FC@eNF|ntVL0<T(ztTG&4UI6PO0dPn160L$_yD{gl#L8G
zT{D-^_Fi}#@I-3UFRYeKZ%2{z;(u^bd<=N9NPTySz2;&VTIVrCW~m^=Ge8<Q7_o@L
z@r#l5=LM_BJ8h&Qy{T*5XncVtPI*yeF;}A(d=7=b2){@9=9W;1EbLRVX4-qdq4lDD
zJsR?;PEl#&)<lZF@lQ$FQD5*qc&SQ_`%xM{=h(fclNAf^`9vv|<yEPOi{M1|(omzx
zDx(omaoNVwlDgFe*Y9SK?<kyc|G5@71;9E*j6bwmKT1!iQuUXkw>)mF?p;Yp`*OeU
z{{AJJ2dUMYO^#HI`<r{6`NsyOC`jtUy08j`@FS{e&LwIuOxh?pCo5}bW5p$}19l#M
z5-#hU2YUoQ6er|J9s657w%N46(`{Pz)^jqCxy)9jFESsD*SGy+VwT@yB=C#kE-08C
zV-UM@f7wIL`bVsJ8z?LhK98qV>SWQr8-W1F&1GHNAl7hAKkfIK1ce0hODPbFDlMix
ztE-;1H2JUSO|7vqTG)bpK{<6ky!evJDwky`&@Bk$ny@6XD8c?8Y_72lp<r_z{aQxY
za<<zX?Iz$`)40IQ%fb#&jutXYB^Ti%vE>WugReV@s}5_{1*<(vk$6myPK^a8aYT;t
zkN04WhWGli-o>;e>6wKEw@;n&ePI<*jO~*fa~Sz#eE={sGGh=H-aiko^EZ@G9HQ>!
zekGS;Z;E~<A7vV}plBF==wE(_9OC!4`@0Em(9B504GWC#bqvi)Y2si1TnE0`ZA%A@
zN*F(v&?lBeuRA6#RBbgHi^~d0<_=m;ZYb(#DQc=}CMjzwRG@bX4T@c%MjPe|2fh9u
zQmgB>`0vMqVyW;SAb;0I?kAlt(9wR3qngxv@@Rj~hfRPFZM{3LWOFuQlp~v?L%iP3
zBnGwqGW-&s7k9WgLyhT{)oWC?noHpf1wv`!{);Qf)azM}_dbVoJqK>ywz_a;O@_!=
z<p5b}+@S?W;%p{%MjFljQpay3JmCDQ_2b&d2HcYR;3i*L&$54}6pMnm4#foRsUeqh
zR)gQ_^GPwgsU#m9sGB6Ke@2Q1{lAQ;TBQNe^9r?!%KgmfCk|8pPF@jFf}R)o_8bF)
zKfMfO)84GQ-|vD2-k(}S?u~!F;GpVwXb}yXjq^}3v`Ked+wWep8a<2piv;>3-;jv^
ziiUWH5hJ+b>Z<n6t`<i|hbR8-{*M1ArU<<o1#HsVdxZTnONjtJGC_%9d>c2<0}P<o
zZ}e%ahC8D!Na3bOd=gQ4(sy-RyL_Tj9f<pwsc%ciNiXB9;3$&%syh1PSUt{EDwshP
zn!~(tUf6f!-xBxp>)U>GmlV~Yt|~ejM!c|C%K@THG8_KM7nS2Ev&_BSeg6;6OzFuj
zE~4q4eK{l*cpmT=)gBNNx2bc_`!R2y%ycN@h;I_KVkvf1y6hFd?A-Zu{TQt2_AtZ2
zQo<aXh20oy7Vxq8cWcYzf~PAh02gyqEPiD55u>1uE-l5h^UmhdUW^8$Dp?K>6n#n6
z>H3~?ixWsZX3z--hor1cxD&BNt975(lY(XZhGM?1y$zmYr`RJ#&^_EGPVYeOwAYaB
zV&G+<e0q{q5AUUZH72o_7fexJq>@*{FiJg9t9p1r!%$jlp8Q>-@Vd65FI<;r)D<1$
z;2_s5PjE$a-q_^7JV$>>DTA&2?yITi$U=smiv`3cou+dd_@s=b_#yKvJu5Fu`PVGp
zw(INyuBlehaTBsRI)o#(xO1XX7eDR3nwzkjPjlrpQH67e8Ur5QPndcW=1gk!ybh|j
z87X#~R`azu+ezkP?t7U%LDC^Xo=JvLXtBjFvVqs>ZtyM*56yYbxqT2x(k=BU#8nJ;
zc@z}*UXefepRhQWvBd+L&I6LIh0CPFNyC7SH1qrh*_k6*0B|^SzgjQ!Hl?suQ?|s}
zD!1hfo*(U7wPQsWTAWwIng99R{33dClQ!R1<C8b<^{E-X6(fNXE7;~%-ud7+)JhZ*
z0_KE-+;7O}sxuEHKcS^>iCc}C5D4Ut%RXV#5#>hK`6;~6Vma2rN+})42m@EGCLgkW
zQeh4QTy1+qs!(Xp>6FXuGJ-5DIgG3y;Z(<w-*=KkQ<{7Ru|2otImBB!l2{u}zm2tt
zw-I}>IVv#l?!lq-RVfDjDr%K~<iBgs`K8TGkAqT~g}5m0^|KW^*eNbQid7-&5)otk
zc4+bpvYyZGe<R7{k_C_aZV6p4JWdC@^;6i}d<sR0TAjx(1u1m-SrTHX2Ya8mUY9u*
zIeu`@vn->I1idhGqKB3smXH^e&1|uq`<J3N=6>qf0a!B*Uj`5kgEm)}c5b~2UWBMb
z8woL*bE1BjR>!)c-0+pavbL|%ryxLofp;rEfjIpn0+b7=iHFA0QsQIxD1UvrrzdD3
z>eOnC1qJ2IqDSt|RTy`oZ8`2ZM$mS<Az08@0yB9VhCX5U#m8N{f3b%EqU+I_=+8j0
z3wwdCk1|>8tv>)Ux-AC2$ojB}s|n7rpJ$tCSjsjD=)!OS1NR5HvXCVwUU~+EaPy&W
z2+%~B@Td@W5}RMl8+N{UQ!WadjDrRaPEDdt4NE)_o*_ka{lYw%(ueBdS<{R3*!2kI
zTwAQYE!F<=FYtZJjYIwRNfVfPyN7>84~qX9?8Q6bH`3vlZTv)?sw?>3IWXpR6-FZy
zop(l_V9LVFvxD!MDaLK5#O|gvaXj$RWfA|^Z2>8<-L20H8cPQHzSmW3eb-CyeRrHu
zzN*4**(I4dyT9`3pLk^J%0j+W<LyEn&H)3h?l;!_nYjE(w8*#OiMon@spb9~p*i=e
z!fC!29)taR(Y*d*Bz78Bz`dee_R+9WF|X>D`qe{+yxU17Fy?2;Dwrxa&k>tZ(oMy?
z?j4`Q>*B+FK*&(`<<S#0g})2h*YlT^!zSpmU%pE|<KLJo0r`8~Ur9(eeCcibZBL3=
zFD0|r)Ztv&wFm>rhi?9D+Qfp}*@SE9nW;~oVQ<yz2|Js1d+B{0QZJok+RoY6zkXAz
z|01|r+;ReC{9JN>KF?QNwSvebGW)bPDrJAKZLGJdKFEl$p0Ebb?&UX`3LO)4fG0_4
zoNKL8Hd5OE8TU?}3_8DFt`S??QL4{=Ff2XJo)=)ZX({=bq?mWiAijzA1k)fyQAE6|
zp}1&5f4e08^-Z2+b!e$p;sY1%l%Zs_8mYYg@SHTy>RKNRWTVjYH7Nq#>K>9U1q2ZC
zMzjn0m!n36@p_bh<&R1H)!#bSacCt##!{fNLt<Rd`O={tdKW>S_wF*$7i~P{2HRW_
z<f^D1tR~iH9<D|4$tHKf`?GeztKbBMs<1n>rGO%Vf|R|$$YX4M!;$N^QK31kli{6m
zx*+3(w2lMj9lc5G0d)NskFR~%I|_2)0O-*17Ww-xHsD(9G2%&sj67`aR?=x_(xo^g
z#%~3i2^mX@=DZ-BuGf|l&X#+C+IRn-rx!bn5E#l3^gEY44{i<=#4*VT8_b_+J+Bun
zPXA6o;9%X}_d(R$pr(k9ct*rWn6XyjZx+iS=C5qj&2Cr8EDsm2`6bVR=53Stor<+#
zwU_g|N`H?n*W!LjY7he88Th&RuX`{jq7A)+<^!uWqyI3u6Y4JE(%TDW5;fuI*=)#`
zg2N8c6Ow{)zr!4P+61Zln=Q}I2b*7~)wvO%B4^q4i~e;ZSyAu<p$KD0@$u;5DsBqQ
zUVW}D7ubhPpzd<b^<a|UVgn~KohNSXrk+a@n2t0jjaPISTHC<S6!>eTfs23fC@eIK
zW1Mq0D<|eIwIx77<;RxY{A$(D2|NV4|J4GBS7PB`%Ru0-a8-7uZK8bcMRqR&fG28T
z(fY?zPukdbT+{+!?g<<Y*dnJ2e$dw`_hi<4J0^R<5hf#+kfGl{j1kV-w_^w^{AwD6
z=$~0M>?Br*c=8SD=6_CgY`(qpMhB(^M?c^V&hWWVD)E+W!LS(^oDY2IRQ4=-blMUR
z>+Z=M<7v8RF}z1eOC=ZYxI*1zf)$2<Zb_ZPZsPV}Ysh~;Cq0ZrM!^rnhq+13^nY-6
zW&4;?;-3_7s*?D}V&l*T@fk_%^LY-t-=;4)qT8*Jjfe6dx_+18zALLLes-jN(r8{9
zddpOcmA$T0UK%xhn!jBq=6R<)MkUfex}v7fFB8`ixjnD7q%6x_sC)ZGpHSyeEf_O8
z(>G{|4>;xI`+O^F^@W4I4yyHMUI#vTcT-kvBNY;oiu;INjm=NvhohLagr00|#_QwO
zN4%SFo`;o@6&Of*<C|zX|7)m)wI*tB30HeP&|f`sraN|h?*ot0VZwI9L4U2sJRlQ`
zgkVvBBBkY+Glf7KN()S;`NTRb=D64kK5S}0_Frzm9Y#h@&ikd=p6~h@iT5rzB70*=
ztF;*WF{{e*r91g_wB9;W5}_f&w5o#7Q9F(FMK={E-7Vq|0Ojzd+3*b&@S4B-hcTQF
zDU>H$aJ-IuQ#o8+PP<Zu=!5f=6X!cNB&q&(mH9n9vMOgcOzH0ubXmz`{fpNH+jP6v
zp{?%RnF`szvY;vhzn>jVW&$liwld7*!YNjCu9K3KI%|l<#dd5%BBoYxEK*^fLxlkY
zj)7i3IT3xNK>#E6T-LO@)fd~T<vo4=2X4=!Pvt-Rw^-Pt`%g~<xtw;9hiRWwflmY5
zN?lx?nOCL)q2M#tsea7s|74r{*GKn|+H?4vguBF|%y#$rxZgJI;e)%WcHDwyCOw_x
z!bnw(rbM8AeyWbB%QSwESN5Gq&22R4SG2FP5>OQKWFZs2ew=$lyW=giKGdF;RsUj4
zN;mmar&A+Zi;_Z;jx6Km2QlX;DWPZ=7vbmxPO{)1bVkN8;0w!{U?B75D?0|%S|65n
z%c$&NUE9K1S@p|Dv>6{yS}Oj86s-rj_wCcy3$Cr_Nv^Bq+4AalY|`%+-*+AOsv`5~
zZj;XvkIui%Wj>Te?be5(xet27^*$@CqwpUW3Q4=6Um`R5Yc#0;5bHtdz20~6*oF{)
z_}2YXxHSiD0XEiEdDTpg_qVau<wJ11*161Ya@5w%>2EKbUC1KQxmMDdDEew|Z18!Y
zF<3Ave=bND!dm=16`>ZCP|IUhWruexcYPReiwj%b<M!!Udb4rPnPKc(niG<sFT{c{
z;}{U{*OoUMw0*&A@7GD9+LwBu*Vnj}KT8^`7)h^~lQ~o#IItQ99UkMXp>V-LlBf*l
zIS{4SBjzpM_Y8*)+@fcgFbHK0e*DplE@H>t&`Kl}IR;g~IPA_`V$U8_FVG09b6N#!
z!W?&i-nMc5fv3H^Lr7BL8${g@<4~RV27Tyixkx)k0+uCIWnf|HE$Gb{w-qRNv9!I$
ze*+=%zs-tk`1Uq^cM~JUQ{hibliKLL?0upKqm1R45n^z4bN~U>-!*PxSZ!jC_uxHq
zo$-bJkclA7IKc=rOIt~~=%%ez5UeiqbHj5H06kt9&HkC5=IF533ok=V)iax7-tgrU
z#+Z+_BZ=WLiOq=-2wDaLpmS`j+n9T>T@ogCQPm_Up|U5C|FVRR2Z|_$2S4m=6N||~
zb=O;B&W^=~VsA?@77v&0K|2u$ggP4W!n4iCNq=0LgM281u9=XnV1qEo{6t5RKyqZU
z9>Wq9Pm3ZZ*xFD$Np-4NaMeMEnu6?H(lUA&=<qktfplB2R?0)l0rshIf}>F+_$T4T
zb>Va@%-4Tvq9du<b6+4`np91xdWpL^>8~w4A9zp%b2&;`cfzR+NF>;&5jy44uX*!V
z*E(NNrPD@;CzdDNnNG0Aa)L04<{%HiRc%cFhU}G(qKEPZV%xDcfb%-e|E<UUcedKm
z*X6ZiO;IB~Tlh3G?_SX$FK$_U#Fz(N0lke?1W8Ix%suS^6_Kts<Z$tjggZVv*in+_
zoVog&e;7}7ZFNT1t@bT3gjO^&0Gt0|X4+VR7e2f8$Bpi}KKP9ItFO4T@IOVsd_@D3
zf4hAWp`O|4$}byFIaC-1TxwmO9b2z#f3dD+%UAQwB3=gUw?6iJx+R|Pc1g9w!+Ghv
zBh)V+SIe<wp*~KC5Tck+MTjEj!sO#vAlNvL^y5PZ2Hm!G0^xdtL>1PJadfW~!gZF<
zLPl_X>Ru?1SHa4DdGFu%aoQv~es1ApCN3kHOD$mI#=D)cT5x0t)w$V}l5b*EU&4kW
z9XxIcd+8cEMo^+*0}mi7fg{XyCig*H3Y=L_kg`Kd?{@Ds-C$q4>JCk8nCYESy)!VX
z!w@B5kikCRryieLT3?rnE&7N;>}m=NEBX2wles0Qr5ZTT@tDS?nvsR4&~WyVIDQT9
zm~_WmQ>3nmT|4EU7_LI-pZ4V2fFsGe6y`b_lkeSl@HTi8InU9-(?qoo#05%B%&^1Y
zfDW!hvm49SlrfYCY99x9VlEtXaT_v`U!3kVO6CKH{YhC#4)RV!)YakTSMIG`#4Q!e
zu?Qm6(ex|M<3K+<kP)Gw_fPo?<|zZTuLGKSKaf5H%aeso;#rpiQS*Qg*BHm}|8xap
zFk_<YmsrrdfA8?p0L@tLsLb;b{DJ$^r>u1BHCYq%=1CU-=kRlgIW)F4g~W)Qo|j6M
zG&j%y-a?fULtN9Icpvd5bgu-DVR_$yrlA8ZL8KCcqa?dLJL69;69qC`is&E)PYTTp
z5j24ZJS;qF2Ftp%6k#t*FiW@vS3G(OSq6funsw$<55iLIywOY_ZVaMvYs_=B_Q%P5
zc2GMhxda{;jjrX~lpvH%V{NhG7+l@S<U?WPalUsa5;wwUWBYs_e(3&-;enVq0+ZDr
z&qL(1)H_ZKicNQ4Z*BHz4lk$3=eC7<9Ws%Ux%;sk#i#SyI`X~7X|_rL;2eeSJRS(5
z8t)LdWIt#zZkpV;W4~45<mIz=eNl&VhC>e$ij0}A?82xw_)XPLin8}shFBE780Snl
zzkA~d3gC^*q!Nf`G?3`W-VgRPSV9kf6;zT@BEP5^<|TtL__fa()BvL$w<BfD%?Ya4
zpZ5Z6&-#O_LO%hij89H}oFG&`cOw5Tb2{Xf31^+7&-8!-qIqZZGA37v{ZCyzcjDVm
zw4GNz<wTxq)<D3fd4#+v100+72xsjx2f)SaI>Rfo(koJ5V^%G|F^o1qEi$zPj+;u7
zYNrC2fA+N(mfW2w*A?&2Qot?FU2RRFLJR(RzSg*9sCU96{r>UZK?oIReTy17OTsg?
z0`q>P)AD?F<V_gL!}l&fMm>j!z>Hwel!<tv012#QMn!D@TGn}16;Z)4beXIYgh^?&
zCiMW@{EQgO9XyK1d+Ql4MC_WOxJA?gU3uBmKY$Det0ZLa44e#0%>P=#84728yefeQ
z@spcnmg)KVu@-Mf-LCvfVZP4a%s6kGl_hc8n$vF~FD#zEP`LQot*O}o{mdiuFtNdj
z_=%^*r6*-O7Xbk&>mCSQR6hm@^e+gU6TZ0|xqm+o%`p|0f*lXi(z^EF|HE26Bd*B#
zeLj4T?Ri<;=98WeXRmUd>KgSP3;a3Z&aj1%7Ch;x({CM1oMg@WY3|;`#^QuLEJkb}
zrK2kiAnjUKz&pVYa~l{76qjSFQ8Ly4gC6rWvM@wRGMVTC$1H?wW;peR-~{8Jw@Ahw
z;&g3-9`uy{qgk~n`&Z*hl0c_h8TU&QVeRdelzq$V?+Obfo*Q}Beeydj6H>ix#VOfT
zoxXeeZe*w__RQcJs!<-PtQtDaa6f@u6$Alt0E98}5U;<8pGs8!OQ?gS{|7yi9)n4F
zPeV~54EwC#{-XGNB>D@U6ian)x<}(ukZzzBuRNjGOIzsKG_9zk+q<!)C-v6ab>0ri
zmSYGZG)JOI4lM&FlA~x7e&!ImGA1%KQidgO;E<ni8IRsTf~g*<JNh8dz$K};ygb`C
zWa6{m88RkRiTryGMRIbQ{t9f3z7hc=hS0K#A9-JNlP5EW$L8#{dIB)(n`kCSsd$G!
zdPRTu96ag-_O`!|>d*ys(Ql9~eoOvgv?Q1ds|7o6C|p%*(7#81z*pVs7?}JR>@63k
zkzbXa9c&pRO^xkgFpU~#Klvkb#>g8Y3>$}i>2c@YFA#RpT0yV&w(ZaIKy&{gtmKe@
zv&-@Vzz2{tnTj4W;H2zcJ1CoxJG~0El><ZrqC*G~8WoN0?!w)}$+KtdBLU+C`Jcv^
z$dRaCZ(jCv5R>*w`tFtMTJdd#?`ER)pzwn~2MHK|&Cnikb8E%eqbN!x*i&v-Jl5xg
z2(9q-R|Ues9k<tL-@m6=h)D<OE6<M;PdU4fah?^yw6m<J)YLVFxo_bwTKmxu$6y02
zHT#lAguUg06z<qExnR+4a?f-fwZd0C$UMCnP$CqFM9ImK4x0NLE^)mN@2>tU5>$+F
z?T~7)U8a97M`#BwVtjm>Zs>SVj0E`MtM)>@Gg_RmUqKJL`<kTA--iUSgks;K=SXbk
z*H%=L9A%~{sQkPQlf)D$=@~5oJXdZVKl}Te>#@zhQ=bC_!+c}68)&vWOQM%ATP{K*
z<bEeYhvsJGtvN8oSR6PUf955NOtv}5fY0h}Te9D#m2ViyhGb*;3E$%(dmworkcbq;
z#qGHa8|I-+1$hKXN=*@^ip2Pf&S|CNB{-!nAyQ=XV1*$ohqj2&m$K^A{@FzQb7Sa@
zz9^NkRAXnwYz2bd*%jxyyq!2whp0xj_Xi&<holy=AVAYs)>Sgacojy1u;Q_P9`gI+
z|0gF7tCx$MQCf6rpt#5AHvN{`#cWq14(h)KB5c45^EUN*l-+G!S_3MCR`2lMo$deB
z9C^Oy*IlMO`2~+1>P1^P&pGjZWAqyTmh3(BW>mR>gyQJMhUc3paQaZn_}g#5V`F^{
zo+rcoJKlN+BUa!pJN;?eV7-xEaP_lPJk=L6^a(|E@<|{D*&u?Vg1Q9LD=`tOmpUBn
zf$E9wU1et_G$TIxDkW&ofg~b*u_QYtJ6N0W54RSXs)>VRmjhA&&$A5x%UaPLMr{_-
z2d;=EmV8ryzyQ|FIFWT~o4SsFaywaCofP78-s`DCd8+o!ZuRtNsU9v_-T3hI1ueeD
z)9SQewLR|;F;W7$(VC1{Y1b!oPrQ;!1#;OQ1Mg#z?nR<mWxPQJXhPUZVm)x=vJd-{
z)+U|qTu!FwH-7x1xIK{`+zvdscSv+r9jd@v9iFI*)(}U4PlHD_*Z9QD{5(pBtORNR
z;0H4iV4U<Dn8Qp!M(;R97!abosp)S{?UsdYUDI0zQK(m}w93T}(w7e&mJBA&XdeN`
zziLrI9wD0K0=Ge7{F5DSQXC}zQ;7i%kwZg6Yi{S!dwJfzxeNUSEMR)@xIY%iQij2S
z1MfP|A_93dsK-?1ovjn*1>wQEJH!NY9zS+X%ET8QFZv{e{x_CvI0q|gyLW$e=q`P2
zNMn``Tl_c+Sw2iIkT)N4F8m>SvE(bc4cWnH@NV=pkzA&K<>b1Zc%Um5e(ZL~h<$8}
z6X_zc9-4%ef&s=?`lckn=zZYbt#^+<($wa(HHxdkw76t+hgYSo_+^UQFb67qlT`DC
zo@;s3asEaZ4mZ|6^8_ol|5A6w+n*5kL0pXNt(CnYQ{kQhj?tIU+;kSrMYk3k>odX@
zqhKT2Yk{VR2CdnzW+%MjYEYrfmMhm??EYH4dQ_>61v<QwT-A(AxB-7+%$u3e$~45t
zty-^#yA*eTI){z<rDyQ!<sipWC-==@0KGYQaOM@yNAD9fdEZEG6ZZpP#7mZ5n|9P-
zE86F>2xfRcs_W^@ADtG<dqz?M@P`aR%tLhIHft82@`MCUQTh%*ay+B;?_1)3LlmPt
zKh49`LiLkN`$1S+)7=2<E`Z$cHfr^Y3*ORu=-?KAw65VPn%0_s^Ec7rgR*VZM9hF#
z%*)gxR{8Dx8h$At%@Qpx$}s;+>oXTi6*b{_kg!CdO|bKqhD1u$K8T7GFj@4avLLMs
ze_%%w59T`c$$BW@TG#p{e_BWU*Dq`ScFxqcjcp8n0-Zqd0p&hKtpz<c#9h0INwRXk
zFEiqh3KpUN({(@3jS|Y!mOO(OtxFdO1Co&4{pp%szQbh`_8`Z&T3wHGkt&=H_8VTw
z7y@N=;diEO(O;rcX5_U%4ZWFOHoKi{K+$9mog3FJ&I*>-cY>5}<nN!CUkq?sR1J7U
zy<stXqo49y(Loi|VG2Of=5Rt8$aui`k@IlMB-np3?E3Y*lh!Zc6QHG_9X*D4uSAxY
z)NG)g8rCJZmKD-~@;{^g=j!bKIm*dI&r*t?CfaMYoRy>_-XYeP=yRHR+nbV0<sXCS
z+{0sQlom7#$p)hin)k=x)+H){FkX?5tUOzQdB6a_Bl&m6Iq}}V2uF<%5<=?G>XM^~
zbpa5xq!~-XNk7yZj~u0<3L~ts#XCiaH^d`&O$_ie++%I07yA~4m(_NZjxv{y1w2ak
zW?PG#gu&v6Zex}QP>5bQ0q~OG|INT^5PEz{@5vc)<7-@74Itx^GW^q{C|?HzYap1)
zA_Y$Ht=2VLze6y{KPic5Peac_Oi2W#K=!;61M_cFff_$zV9E3Z(xhfr26D%Eoa-7W
zxDV#x`!K&zC8rkpouUO3j4SQY08JXbEOBs2o3V;7kp$=-lb*h}v}Sa+VhDgoumKk9
zN)J&PU~-E&&>}`oW=0Fq&?RTQ*(6(9YWWnO#aGtXX&I32!BW1iHcZ0z-vYu4@6XXy
z@w9kG8GgpI<wXM7*q&&wp1Q_8zr?kCa4{Mz8gM}@E2KI5qL@&Ff?60`PTSz_Uleet
z$OmuO8E7h97dY?&^Frmy9Jo$*r)zPM>mb`p_Zy1cYX|VAZoETa109&R`~u@tltQ{o
zBCN)8$#XL_xBOW;>TAozdearbQ<c;PiH2b1=&E86%2icGWkp+k8sp2z*w0_$0cXE|
zh0rkM_WltOB=x4-%Bm$b)s}OWQ)?q;WgT8$?y~b7dPu3{T1oMkjfqXAxAhS4proAm
zueLcTfAMBQ9?w#GWAAlFb_#BtOHhTw%WfwoX<UA%jHz``klcK>0y6GW3}A!`nID~z
z&YE#=;k<hNBVt{@L;I~N=U(AH*8phnhhp1f_Y41g^;b8PG4w`^FzV7I*1kx}`F1Ei
zkjsJAIT4JKQmu|?BH1ktG%_07of$e^a8n(>aCZuSUaY*H?&e+?@Z-q;qrq7&9sSb(
z9#b(Z2Was@YSX}ObkUaLQr{8KOukI%qJ!)G<M3d<u)xuK;hx%(*kdKQFI6~ed~_vl
z-1`Skq)c)80>S|0y=<)GKi#H?7EHe5*LgASZu5F3`i1=|vh58;$SBK}0{|bx!w@IJ
zdN|GE+zwq!g_IQftCyG7pWHN&=85(T4sqXpXvVzkx;$2N6?ne<VA<&%EhT2?B7W-#
z<GnA!DWs=30e->)pf2(a6jeKlKaolM!=Y#V0sY^?j%Z+qGy1p&b<M-mB7}KG6mHk2
zHtc;Dd<GA8Z3slaMg>ZPCudo5l6JAgZ7qpb%(Y|;*o69ty;v|E-|@1}pE--=FAs6{
zVIj0?v)eA1nic8suR2ZfL2m^;)$Zg<e*gEK>Twp@4nFU50SMg&@DB4ct#V*qaII~T
znAuB%B}I~wivk#zt9W(B-@XHWdtuwUS7DxDR%W;Ri0>fM{bUzYGKkX&<cvnHr%EOT
zFat?Fp*~+D8W^jyt5I_Feve8%36x@C&Kw6V4Z`3jIrz_AWvAWERf)=sy1VoRrUJOD
zk_Z<9=S4g63E(^=Tq){|X%aFfH>;pebHW_)j22AYXO$~v4NxF`wY^g;UUC`|y=2CU
zYKxgUaXiqJFDk_3Pi-`g=W@MYG0zTu)_s%N#Ft?GuUm8yPtUAvZE!p-){Y7?ZWO7m
z{whiEojj}(QnUaJW2%@pNJ?|Zh~7n_@VL&XLawlqhoBbB6Ma1xD#6_ugo?D7D^8)H
z3-b>0SC~DA$dI2!_Z&|qIaFoRJ8V8Q{&t-I02Za=s;Hk{gM#P>Z@RNr(gp<I2!F)6
zjY0i-EM6*Vh`gUJ9}m0_qqn0UI*X?b>Q$1lIsn5FS2)LCU@nybinUOvj!VY7>N=MB
zSj%xqM>Fr6Es@(tGgp_b)mrW~?c1(kQ)5UmS*-d6{Rf<xL4b*XAB~;*IOVJwPG;op
z>;(5_O_h*oO55ppnmN2r5JYh~d5<5GJFui{5XLT}<iH`@!@P{R$;ct@3fJ`bZwF0O
z{t+cVg?xboC4c21+{F(Dka0@S3Jn2NsL)r}{Q3g+gu6FYt2-}A$p-Jz;2fOU**ZdK
zCSN$mPX@r9N@YVaSq4x<X|}&0`{8~7`qvSvGdvI6mOkWC4!Q+hr9<0SddN~2-fxOV
z(yztac_rltz*dh*BUU<G_6{dXEge*bP3`0dKVX{+_)FK4;yc(#%jMLwvXTZfnaNqr
zX(>gJ{yUoEqB;4+Q)3f0yDa+7HQj%!aYM9!y=Qf4AMF^keR;mhmiXIiu$SYW8w0dy
zY?^@+B!uK(764yj<;{hu3%`!s7Ow)fydO9*=0i>RR;i4D?cjVtM1s-UfqlHearyjY
zlZ;H2Ifb+Y_pl5z?u^Ct6MS>KuFEmSx%cCTYI9;^d4|4IR0u_J_w<kouijZ&w3}C1
z!CxHi`pO!`?|!mPDowC+P>s<3P+vABSfS{tRPmKKw@R*vKb$7b(h7$-CzhW@*YPDf
z!+B<Km&MVs1-LdEwN^CaeNysY^F$LiSUO3|94=QIdh)T)tdN@!u$Oo9xJ1G|b!qP&
zyYy6DF3K(6S$pmN+qa52U?zG(+=ip6-oMg|DEW*_)}oO<Nw>fY(o`~sCZ(ePCcqXh
zC(w9K?ek5^b#b^#Joo4F$G#NVq_xL6X-it8kL$HMbE&8Wg$6aMbHgn!`VGITuM{Ue
z$a6qU4RB8Sw#p9Bi(swBjf$%6s@;9x{?q%%9=+oxKT!Y`e57y^**oDQF>R7`)rhe8
z7UAUlzapB1iJBX}?L=1dH*pUPUvzKru}WQ-Px<b+Sf_M1FV*pAYJWdtek|i(b8d3W
z;izVozrZrhu5DRkY|z_Y`wkmdlGv{<qM}`HD5rB5iwOipmsym5(L{fwgz)(O7>7a|
za<8aU1JxuJ7J<z*zxcAkb<{{z3^5MmAhfD}9WgC>bb~qi>DsD5i5JKj7h?g2mgB1S
zd0`|;w<s^V&dGhQVjaiN^=q{m?YM)IfP%kbr`mHBnM8tjGje(m_<{c{#+H7ci}9JA
z);MJCMl*|Jf82EOR_izs^skjc`RwZM&r4{<xHX3>OoQ8lUUSe9&#h{1OE+p9I6!7y
z6@Qg7IKZC)1+27}0ql!j`6h-K-lnMFsB_5?_}1alvVn2qXR}R8lI*pd4=4{!e1)FV
zXPUC4#@BQauV{U)Fbps)4e|6$!c-_Y+R`?8Ott~-x8^m8_b98|S%>jKr|oCt<n3R<
z6edbXQS{$*_PElq_!oK$R!V>R9q*~+$$I`X!!dQDP+6Y0xn@y+J!pgU=b(f2qOcA^
z&w5`H#lR*!jpDVw+*@xv&uE3^7=;KxPum4UdP`eKn~0&BHWV(%&!zVANRUq`Fk+X;
zdLRj<*>1XP^2?BfxYTeEZ&7a<rMW?0ulCZk_dzxj4r7*=A?ywOa5zQ!ZH-t?3*C)Q
zDJ9}7ZV#Ptzk7%2f{>z%(E(o`q#PFU?vT2a=Ps+Q`FUI7;TDBe99)!FkW*EZPJ8+M
z+Zgx|4-|Jr+f0f&g+FcdmziPOci7`E{~RkBXoxa-m-`2OtBWi`o2u=UB&=1t^N(|3
z@Uf5WtBJ4^4qZ8otnTPu0xlDfbslFluv}iRU-ID;f1EMo!aHff(QK@L;kO&c#Xf_+
zqaWAVq?_6(4=EA?Qttp8zz_MAvv-x*pl#Q67IYd()D^%7`h<~iP_wL<#K}jinV5d`
zsEcD1Av>+i&ymRm$5zJybAFLqA>HgTiG){EM=#l&+SL(z@52>vV5jeI;HyJcb1S+1
zJRctJY*<r1z|5E+#n2YZ-+UOF|KS|D*+Oo<e=tUNRyQ{%M#gSCw#U+D0v3`%pFTUw
zE<AB~5xjWd#VmFQVE<?=Xh-{>kvs>%pAXwc{pC4sRDsY*5zFVm{z~%Y%5CcVQp0%n
z3nIkNw1txn&CkC;*{4`%W2(sy<GaSte)%HZ5}4_Z&9GES*?hQoa9O@SJlQ?GYyW$W
zy;nfUk%)=l6TmpIJvik^<@82(bLiDOt|y_sM4+XXioPB0(KYHY3Ic|k2c9S@amAf6
zFRNY{901LN3}6ch&)QGLH3^S6{R=?t;9}YUPWL>Imr$^nr$IrVc2CUB*WEvu&w!`7
znOyPX2`bDYlrb0$8|=(xaz3{`#Lpk0mYKsCZv0tE(pb){h?ox!bxp~&z@AneoocIt
zl!unqQ#viTV-iG+ZXU`{(<dDGUwcVL5kx1Y8tK!90tx8wEy)K&l*jMAx_LmfKJax>
zxX#b-@@U>m`V=fIpSztlf|>sgjTl7xUoC)K)@TpA2S8s)`m2=S7LL=|Z*s!AXf2G7
z#7~Gi2!}g>hUq)NDzvZ})cJSI^Dx4kZo&w!No3H=Dv>;nx~nFdX+#y%4M(*Uu>*l@
zfsE-lF?31glFqbt7fPM{Krk7xpDT|<yBe7Z=%5cqiiSi#{bQMJn!E^D69T$PhWQ)$
z5EfFaEDC&}o(JwLn;RG<sLbsruH(6|#!bt!ISwtU8PldASgFg!nL*GsMOKYX%oP?n
zDvl7BmbP`QbElek*y%~EN{C*z%%h9gWeKf%w=wak7mtlCH!$1ou^U@iMJBeOc{iXk
zp;G>{X*Ll%+}YE;vmfG5mPJn@C|GFWPivpln*^dKK6BKeRbK~Qgg62V^#koOP1{;g
zynnz+mf3P*?h^!69C2+-n7?SJdVawfAF0nm(fYmmL+K!2Ab;hXFxdiYJ1q|lxoX#f
z%c%Jd3;`a?LIDJ2V79K2BHA!Ni6Qwo?YVnDAon~^hZrk`^BM(=$)OL8{tsU=V?v}K
zdY&4&!13jAw32wh7de<Kff81WLhfXIC(PO}AT#$q*nM)@X2@F6t_WA%eG(!=<OL9%
zpDJ{UVP71@l*Ow}Th5Hqm_?um7+>LyB!y)O>#4KX-%^w?_EH)VT@O7EiJ|C<ib}v5
zhxxu%81;-e?N?I3KcjmU6_|u(4-nP?EIm7~*;l30F8)0lrC9A8fzG+~R?yt2C7?KN
zA=^ApZ?-6yz@P3%sw$qDX*M)>i+e~Fj4Pa&LAK@MpL?YVMgCdl+G0<81N1m2A<q5G
zSi@ViM`yJT7g0}0D~15!Ml5Y~NG49e5gVqsI~M|^8V6VD_t%x3#E{zCv>NN}!6s+2
zD&yS#5{h{T5p@?e;<=Go)Ioq7rrTTMb}=kMa2&Gy=Fuei3+HM@Idmus7Mack$|g#j
z3gu1^buP7z2AJ`}#zJpaZ#ZSK5W!c;c=HBtc4FgN(}vA;X`C&jYe|YyxEX9&Us^FT
zuY!%@S`rUqE%<QUfx^NQ*xQOby){g4ujb{9QgPq%|3=Au7!%#`wxfPeD{iZOWi;#+
zb%l>>TJh}HZ7)w?>2Ri`b}uxYpLFPnBT!@7!^<az%YLXs*BbklwHf;dknxq9=N^g&
zbi^8nw<3IzHIyYbcfNjRy*BEl!^x61yW-;`_i+i@C|RZN1{?kZ)%JO9B}$8Uen(y$
zSo}a|3c2*Q^x)U|`;w_}X$Mk}eeIS}7_Yl_j5xA@d&MPIg&(qVF>n`=Sp#M^j65!h
z{<L^kt@Y*GPHydHt<f2&*^WgBBzm{Yz^S-6W@PD7-Ioz=*PkRP>W?#^K-67N+J{BA
z#owM5vi8jtHfoa&^!ZLRENQi|Ni)>U$%-(NlI(~3j*-<2l*1_*s3h@`(vnqUlLtQK
zR8xNtBxM(+RR;?xl5fF6I(RXX$36Ng-5XppYlROWem3K6)cIWgl4Jrw+|4NcbI8na
zz$nk;`?9sy)!_Sqm6xthMswxA$K1SbPr-u`<o}-SSAOe7`X*3fz^`*JYvbJHA=khl
z0W{jS>zeh3hEAzQWs_VU`2~ML{EYy*JA!;$%<RK*xz0Zzc?V!-3Y|K67FM|R;&#(b
zs0~U{VKp`a)_5fmQd`rCkeclW=QM+6i4K@}JQD4tSLvc6>Qg=;ipjpi<Q^_WT$(PZ
zMkz#@kQF3dR~7iYYd`4<paVodH5~(z$qCagU|Nk|9NKPII<sgUU+7E9BjSAkAKc1{
zmUG$IwrU<Hmr`po1f61R2lw6SdAJSdv3hWY5=uVx7|e*4*pls5idsr+Gda1j^ys0i
z<(i9_?&;#ipJ^gXhDzg9Y|V6zBHIP*`}8I|pdojdUzY7gT)l_!DLejLq1{TB(KB*5
z>w_ra%^5ZP`A@>{`ri>y|L4p11ws(g$lFK=-6nMCwb*)*aVHIZ3mG#=V9ilVNuTrj
zmIrl7jH^EG@A>9M3b)oK_taoiV-oH3aGA~_T9H>)Vmc3Z+1}eO6Wmlqs$($4cU;`K
zWjt5nbn=a}{9DESXpP`(7-79o5Jn_IjXzT}(>#CB(Ndg@q4W;KZFFnaQXZX|2MXS?
zxW5QqPGfT-2e&ZPjDJ!zME>W6NmTWCO6GQ-@?ei^M`>yF;aDrDwwoVCX`X2_NMa=5
zA+T9i{b@TH?}j0q5bJX5J7PZU^GJ-=jNmvh(lJxBp%@Sa^!Xy}=EM^?Kz=sT&t<1T
znC6c#yWPa5MU-FCEey-ULB~)xpEVD^ri(Tz#Xmj0jr0o+ME=HLQv_Nk%&Chn2p)I4
z@4So7R<E8(LE*{dv_qXh^S*gXdh2QF0o0oQbi;Ra+97-hxKjk>bmiA(63@mmr{ovZ
zNA@ZVQ@Z*c<P>HRZC?`XI~fXyfE<rmo@opr@PeyR@=27Jbztnm!M2mu*L5yKl^XKG
z<(|<qmY4)Vdl~GMmcmFh#%_-QA6Kk5_aVGfg4Io5E==trlwvk>EB&la`UiFol|q}7
zlCM7re{lkQJoX7v$7=L3zMhFPOS}oii@ETvntR9rt;O7<48gkj9v-R(y=B|Y;xZUu
zma)C@YWd|b&yO?y9RQiZm`I5|uz3w+0@s(JJs$xW86%r5-A$F|dNJlLJ}N@uCK}xj
z%clq;+4v`!kJn@3-Or~t0(Mf1!IX8AvGbNUb9((4Rl_|zn48a6jPLEA(jAs5NPKRm
zeJFlSk!<RIaKfoNJw~to?i*h+P#n;J#&}jdNQFcm5*~HX8NJcJ@K1U^l~fr#$#!-9
zk_sU-BUqAf7}FGE>w1wM_!OLC>d0(tP10D0tFz*Y;vTwBrr@&-3@wPO%N40l<UmC=
z-=`G|l}#s8_a;5E;H+5(m2}cctPq-dbb7l%q^Ikc)W^5(u6?gTBneUGAC7+l<%^rn
zaXT_LCAlrRF%9$}wH37z177^CHv8kRBxgdW+bZ6qi`|mDdIO7|CChE=ttX&>t<?L<
zMFb{<C$pRHqO(w&{p<fl)ipNO(S_UKG`5q*XspI|W7~FPH@0m%ZERbOZQHh!d(!vb
z5BC?$p1o&3SkGE($mVBW1%@EqM;^SqpB|W^jU=#0Bfvbq2Pc;%7uzBl4&sUy<P1bk
zRB=dXhTro$JLh@l6>sG_+RT{Jo6=8;R#8a)gg{dnzo((7Wj`GpO<kavDR(8s!n4zD
zntFO)W{=|*R#*hV`KBfrq8g~_pJO;<AXL5j4JK%qqw=NIss^(&ztrE-PLh#(VQ{26
z1XFs{4Dk2Sg{$8M@fJ$y6_TTWAm{8~7OIj3=$U|D`PC|1^r$*2a+&A|tv?`mT!@_^
zKNe+A(7jHLb1xz{da8UkJ2m*C{_}~gssH^Zm=3SiIjhqz&kr#bQk8UdWsPjwrBA*z
z+Q_PAv*%u8AN^V5-4ckv!4sRYU8m=FB3t_3q*Yo&&4+Jtd%^szA}XrCmoqjk3^Rmb
z<#vP$*ufgV%6Ae)r~7V@6z}eYPNob&7U+%!AKXM3;c-|wmK*@62A`>!Wq~9Y{S`SJ
zwfws)OL9d*-#ZqdC<q?mO(7HYM_LzCeLVgi{)c2w0<74S14<&-K&k({v<=`ioOe6B
z5x)Fn=4EX12>O!&6+`6X@g<6@<=DZJ!?bYtX69l1{cf=OowxG$`g(k#>e<%u?Oka;
zK|5<ved)!X)db>LYkwW9*%klVQy|egOo;@aA+$>*&Vs02W*M2RORWGwXvwU{Ql{&N
zYfS%fl!XAyXmsU1)JcAf8~j5BnPjdK-|;lZvC<(4&USZlnDStG)O{ERm3#PIml_47
zDanTMC~}3*c202#GFC3P69f)YLr4>tmf~fi|ANMMe!n^K;h<<Y2uZLiaaBhppSHZj
zJYfh(EG*ov4O~c(ueD^6B|C#{e{H^ZSa-~S3oW%M96Ac{@thKU{r>x!$AEK7!j$YY
zm~}}2>Xg7B`9MwtqhSiDV3W9?c+=m18EI=(wQ52(?ax75bv9EU6@T5QkGyU7lAn}i
z?c+n!d3c!~!A1lP$8G(pI;4SY&pGjyKGv$AV4p4KeAMLFnlzoq0lw|!(~aM~?ZfAx
zdT?S+$c)r6ey>B92>1}`#4e_S&E}Aqw}IhI2=9uatTQh@t0=!v85D<H3BR3f$@GBr
z0zilmO<^6xwuTb@<%oUZ+N8lqQzr+Iri2$pt3)aiWlc{zt3>WNLn(F5gXq>EWj4tN
zdqnl(+L#vVqLeM~*D%1q5#8HWG%8e2*ijaU%pici<V9;7yGwJ09CiH*Y7wCzIHf;b
zU3ycX8FM7oD{!It*>fg;%jCTtv|igpA$BnG4$kGf8LO4$%BI1e%`4-zem@I56{HzG
zxm4d*%C+U5?DGlyX+(dsbKH-css3;{PIoq!DDyKbx_KLTH3C%}LJGhgHo-x2A<<I#
zSnbWw#sXl-_2CfI6PvzK0JL{7x3TtzcZs!Qj<8SVl`1@ulqPV+I7iVO?DE?4-64O;
z^NB$*19s|WSq6go9h1J$ztr51><Pf2d+R0pCga}gY04`T25kUTg3EMJJAQEeBIW2s
zs-k}m(PgCV4;f`xJ>vZLrXk4u)8JCVBIP2-RX$#Jg$+*@?8!S&{gN6j?hcY^_?oEU
z0+%Ub7lR5yM--Wnfn}vz=ZUfSoWDEqXr2nnOhIs~2QrYXP%3!4Q7%;2M^}|h{@}8#
zt7x;Dp|u79?dDrTsf7mls-R$Wdq+mUO20rn%bAJ_bWS7DfnC&2!?6X`_<_X>x#k%V
zpkZ;5mA}%MD`G>MqLf9$^T0n+U<MV}4=n-|L-e8owUI)(?8PGp^H)`lXu3X%jb+zx
zJ!?HkcyH21ya_1J4NDJ7t926>QtA@zrwu=#7BM~hF3PThe%~V!XU6=4kD!Tt5{!@#
z5)sJSysD&lcM;5q!^Tg}f`@IScVT>X1i(?)D={HytVK$(w#=zarWQ758Wj(K*oOJh
z>CH8?9z-hW_2~-DHq{N)7B#yj+0X()R5HY}o5F}>E(Re~b`{vHv>60wFw4dZfX*bP
z5ABv3{wTvq+Y`gG#g4^sRhq?7Z(pg7*U!@ev0jPO&pe_sHv};!bhtWN=DdmCej*Se
zbn(Gs#7!i#VYqESQFGWpVDMWGgnL6nM!|Nfc?{Af^5-E6-f#O@{u2Lpf}<#aNh?IY
zeRS+xEKsb{$xSShKceM_6Fl-$VXs(T(A;Z(%)nU{XJ{>A);wa5nq+2Nvsy{6l3nQJ
z2Y<6RRR1N$D4sQ=vqiQ<Nfmo>S9^QX7sX_Bfp&xy{ua4;AQFQcr}2mvOE9!>4l0iU
z@kE*UA($)Sf_m#BU`OD^L5vV9K&<+O0?l*GUi=Puc5ln`9Ew6I!c&+EdL4GK&X$Xi
zWVW2fhT5C-1-UyJ9$t)GVZ&~_Hm6N*@$lUbt{~Ks1U>*H`d<vvRG^%cy@>dCR;ghi
zG=Vw4lG-wQ%5OBU-N<R6;@bo^#toJAJFIm$6%+U6qU8V*L2dzaPIIYt{wGTVtDb#R
z`UpaT5&DiC_{>yNEvneZF@<G?9to_Tvr9#_6vHG*!FCljy}LUVW~E4+T)jQR2eGz$
z!CLk6u>#vwdD5{P$m5pXPEmca@<sC_ra*UuaQF}nH+PQd!rc1q#T+xwfLye4T{#+Y
zTOEIiESvA@1RIoIkoTL7;p_VoUnGJx;)dB*tkF8{yPiV|$V56UTRp{QP6fNh4@vrY
z$16I$95sPyg@FiUfOu4iij`GIMl3Uy8D!UV%H%RM_Kzqxkj@ALwhl3te`vTuw$Icu
zmBE{q>k#qid+=*LGXF;aA~LmlDMbp(GO<dJ8H@^Pk}QlE3hQ26LZ<ZF+ol$<T3mka
z{&9G`ThbQz6KzB}gvA;2kA94%oIZ6j3aOi%p@N`oA<8l@Nn2QBNl$8p7=gZ{FxRzf
zr)zEwNsPcXOs9qSo>xKoY5C&F#kbu8%N36dg-0%sn}g66R6jn?j4(57B0|B>vS{0o
z!YSz83)l^G(FlViTJwwkxZ);5laq^i2y1X@;f#`5TTIr)BsgwIYS;gPFLoUz-<RM&
znUd(l^8I2eQ_!R5M?Y^KCTqrmov!>cK_L7a)~IVCN6}Ky>o-i7#aB5=F&e(nAo>0@
z0_1p7+~QHWAUk&NdalauM50QMEaW_zKYo$YQ-9Q_boApFMGHo9+?ZwjK43sIxBc~9
zCJn?HZ9M{LF*tYz-*pZfkvN22zixf6oVs?8BX}%}1vUbfrOcV)y!6G{_@BW-_$N%q
zj5OlzVZy(AwTK3qG9cF|D?%m}4IJSKZXOWqXSW)?K5mE7y2ChUR&>a1tgkM;ctc8f
z{nBi!!rkXw_l7w*`V)a$7kP!VR)#%pX-A*h#)fqoOEobpI(9p?IKPWY@(|WzsIz8X
zf*TbNH7|-ADx2whC+|+h>>6z#Oyd2w<($(O$FCo(P!cbhbO5ugfofs;J6XZ_CPvsO
zB41C`0s&^mUE4SrsH7YRuK67hbHMUtyf*3;_W(t(KvgL9vd1LHyxpO=pisrkR~PJ}
z`d8K@lO~>Dt7u0A{HtnGen1gsf{9qozI-EmM=aZ`UkxjO$|kRnyuCk^hFQq2Jagr3
zJ#4kfdY%Cv@oOI|qG5pO$cM)I8Yd|w2ZvaFZnBTe4uc&a80J2-+c^<OAm>k~PttnH
zC<KF$^;%|<t!Fo8>!lwf8q=k2N9n687}r>JZO&uS8nQo|xv-5vuLb~v)XZ075+s-B
zP~g&3<^Yl=58oj{`?9Ds2)2F%gMJ^L!POpy=3>|S*$y>4*!qb1LQiK#*~OkyB`z2}
z?92G&5*f``damCr38@MOG_XclrM$76LG{7T2R1x6_+J27@&^Ii+06J-k1}jYy^bf~
zm6LLTSLt=arx3IRVgSQN8n}%Z5F8YQ7Xu*qrFY{$fe9{WInH%dX1YaPILrgCE{QE<
zjEji~;k(R@;gG5H<NE1+VJQl7ahZV8Qz|?>o!b+udHNu2Tpzgt;+(iH_^-aJM(Fyl
zYvm-?K%2DA@H~*;5(yYg8!jCMIh-(zQR>Lz^0gi@mAFyDCVlEx+T~>5a}p<X-pcwI
zLeFWcc(cqo9X|42hzLrfJxM;`J&<?sn7+)5rU=HS6uzg2z7{bHLj57p)14PZV=3(1
zKBkW_0UoE`J`so{5f19k!0Q+PX7%vbG`#>iAtLHL&rCc#{y{wmRH`1aCX#tUa)J|x
z@Z@H59Z|g2)q1yt_emqg$DPzn!xa1{<M*3Iwi>k|(EH7m?NM|A@;sua7QxLXLh&B<
zjFD0yRT9jv5Q3q>s$RPKfTTc&NL=hhzAiw8MXoI(K+ggFp{*OLg1#|&SlJwV%aEYD
zIBX7S=^RXZ-zg6rUZLTt1LKjd3u=)ROekD4yql7YF<)a`uN<x{nw{aaa-NRVO_2W!
z=t~W-{$8D&M0$@D-;PqI<5&E55v5!JUDbLnfECok%@?=1xgdORmt;4AENa6h!D!bQ
zFZrqIdh^(!JGe9hvyRRE9K%2T6z(y!Tbyy$U<A~0966~D4a7FUxzy7R`X~;J`lNwM
z>c~6~NwzB16^!v$-5z#0_YS&g6z-}~B6o;!^%$`&g*KE)X<^X3p)5ZcSRu3dh;PAR
z@t6COaITx;as1*y=9WQI9u=)WauzZ}QO_vyiQ%yALJds+^cH6)>$Zk~tM+UQcp@am
z6IB~KElhdbRtqSiqq(e7jRXhuga+Ug=zg}~quV_M!eRNrIo%KKyA%qN_t4eIfBiU(
zO0cVpIq&?~!olWz-{F~eY^wgPccm~Y?fBbo%Y6rFSas!!<m%eHne1v=;IH!FhO;X2
z)~q-ED9U<MO8rZRK%k!~pgndmA;@c<9kc|cY=Vig6N}jp2^~bHYT@F)8qnzj5+}KA
zi)tFaCkk?3w_iGduG>0vn#Tron{B51z#8g>xxu>`kS$V=S~}J=dmvho(6-YO_B|5y
zlY^LR_KEW}>{g=-Py?O<d;<LEAd?|%L*?S%y5zzpq0k+Uj0k*gC&WJA5h`S>po@Mx
zY$L*C7dA)o-}uWX<pLO*CPf*_SEu(sT&f%&9_FE{JnX8EuDtPIwrK4RO5hg;#t;5R
zzY=`K*FZ{{t-giEXh~~-kxsHD*R0D*$H6fO{s19fJUTc)_Qrf+j<<5{o*1jFZbfEg
zsAy!lF55%&!+Us7P{!3i$x4(hiS|jML)4&4zM6U>B!KFtA-iiiU?258nWs9zOYpBI
z%46&^bR1{@3)!-NBFfPGswXP00DC}!-=Ov&>SdpZe84eI@inTKVt?!pO@b5|<hT4k
zTzRbZ^w^E0a4Tj9L0;TD^Zc;Tpd4V%rk=tH5b6Yb7ok6ZEtDPv3CIZvcUu*5-s0Ff
zQ~l9N6BNf*v$I!M<+;@Bjqmnwmf{1wZjbXvOdzf4f=A^*f1(Y@;Q6!V!cu0o$2Bw8
zBbfVHsg`UH<hsR8_R0D7I9WHEH}`}oej2X>jC{_3bfu4x5{zcdUxjRwDJ^5{QhfHx
zZnQl~Vjhxi8ao#8`eU<7Gh1~_#UC;35$EC8+Bq<A3fu^6j6E>@^U`1c^5*?yzc%S2
zG;A~24X(B@vS=cS0u;SG#tq1G50>o2G7#lN`H(%UC1++Or6nb$wU}#W$rt5$ycEse
zYI!x;N=k@n4G-3uyEjYCE`ySmA$!(gFvkH09KPm}AXw0Y0Kd+w{_!1UOH7GgRR8w1
zhV|?^LIxW41Gk1-OPXh#RZMAwz<}+Eq>Y#4zx4NTbydMGjI+j$)4uffd-lW?2Ic5~
z=tI)xaD~On?(6ifTFzf+Se<Yg?h$T}wb3XjkEMwhu-F9Mt*e)zDxvD`ggk9Z?*%DL
zg=GCbpgA#qak@$TO@^drTXQk;%a7&^L>;b=<nU|q@ChvuI!5Olm}N=?8ys&YI)=!-
z-B+@fb|2FNBpl^4k~7W3WF=;1*3ue_J~!Aig6ydu6{k1s>YV3T435kdn(jq$bFmIc
zVNn$nU_<T&IQ9Gz2mMEexDNw)gx?Tf8tQhFmTN7o6(radVhO%ogTO!v{^tA2Be*K`
zh!gObvXMscHOYQ_<TM8t7k9U3%@0*=N;HOfuhLPiC_0L6$H&yx3`t84#dGjPAPE|b
zGq#^tCj2iNN_At|QVq>u*erILU?M^PHWyGn!Bz`lDpHKI#p$!~4G%UKX=z&y;Va~5
z%#KfZ5)v2_1gP<|T2?w{TvE?l4MuXB!65nyK_rCsa>FStWpelo#y$p){dcya!a|;A
zT3HUJajGh$M<~-^Wl6D=|7PxyAPeX?NrF}J`U3=`Ph6$giBH|SXYJt^+6zlVN!BRV
zFc69c6kB{Mjj98Xec>a<3tvy9`&%$qkLTx%;s8h0MxMBset#&;DgV&TGTl1vn4`_`
zGvukUMd<Hr>OgIt@M}b$ZW-(uS2!>zVL^RS2S_hdW#I=ydIB(i3!5yJ=J+lJ`h53f
zp2c?hrr0S{$%G|5&#!H-p^>o!t2g_bJGI)_UB!8Rez!~-dnuVL+x?v5l?jy($1d;%
zLIx%hXypA*IsnHH0=<v{*+9Y7btvb4lCACxVM&X*aq<>^QR#RsZhvb~QIs^F@`ViL
z*s$@TF~69RyP4;W=T#8!k$RYKTGdgzuEg;=&A87qoX9P3D;EffJ)e|7IJ0NFrk_5m
zB@Ci#^$_ymd8@eLnMxM^7a2fC_Xbs4nCGcfBH3$dYy4Q5i^Ma{91;&j#9)XlR}|fX
zlsPq+juHuxuVeF+9ZF}d_*`T(da^w|EeHck|5BIjdB3zZ75xM1$|lZ$fW<(>0GJT<
zKA;k?hyQYumUK1UJHsWMRyx{d5}fvIvsU%2^^_n=^1jwx&+L^u(b(6kq2tzg=-Ubt
zY_EJ<G`i5Em!P`-jxgGdc8<}pT3HG|2a{SIIc8^cX|{6id!V5}tXcLId&%lAYx-_T
z4m#1O*R?-5XP=M6Uu3l<uGta9yj4Vn+VZj4Sew4+91BEeVPktqR#iz@^9-aEeJF~V
zfu@ArmQ$@LzNCZQpaT2b$sWZH78VY}zGm&@bjuLQBn;)Ms?lqPyz(jh*=z2xe*!iJ
zD<qH=*XNtxSCHhun)-^M67OScye%0q@oc2>W8k2$bBykFrvuhFGE)rWzJfgtvuR55
z00L%^Sfv_v>6Hu!DXe!~-p5O3@5UANip~xAikx}exP|00z+aRG&NR%*4buzLV<;AP
z#t0J_YF6i_)GsjDoS>-u1>k7r_t%nDSB}066Ij234@0P<O!6YUle-@GmsYj5JrQxQ
z4V*;X7%)tN!JK_9mL$GMrvFpdu7omK+!1C1?VB3xW4|V<ZDI0bem~M&)9J@sQTd7?
z)`+z1dR#_)y?R<3#?9_LIot2wg-1?5l3JYHON{f1kz0vaNyk%Wn!@`216PFr2+9P4
z-4<xcPi3dKHv|nId)??k=N+i%w7Ln*`6N<4!ID17CpQF`%J2kXrx%`gd764nkP9b>
z#*S3c3|K3;+PqXB6n#tqrTc4YgC197i?3ajqg<7-akzWvfuUI0DJ}TpkbwBFkCDaH
zIsUcLqGELpY(nd!W?-CleHNUSU~7eL+o-D43tid`jq1Ev3n?xSeywCXRW2~vzpd2j
zAzQmvN7{4rzfCN}W=H;j^2Lq-UPvPvqn-KF$UOJoKgUK;JXf1YCYVblz3fHD;~Rva
zK<OS;EV(=jLjOe(>Y%RbXT!tX;J$-Sf|U=Y<01-gmsjyH+&lM2PwJj8`g;Gz1t7U_
zEqqVevD)Y~?~YQ)EWQOCvm_B1+*pqm4~xS|89A9YYeeT00Jd_ipbc=5eX!2^otT3j
z_IGNcmlPl%scF(o>^hs1fq$7=z(>RglvxHOTsK^w5LeWOQaO>?j^$rPXh9TzsxlQE
z_=~Ud0kasJ0Jn(YwOHyYUa+w$-g(Cbsj8n%_juxQNI>BGbX+>ZJ6?sQwnkR%Na<fm
z3gYXx4~9k}WIq)8x0j7vjiSl*N)b_Xpw&i=jeZhUz$xPfM|M=B;*HtRG<}?Il3uUg
zjmEVacltT&R}3k$oq#ImXr2OF+t4GJ2!ZI-z+_>l)WWn*KiJNm#JBJEQ?|%Xxa4^P
zI~tP%t7%%ov*CNO_5cK{?=?{>Xcu3jo1`LgYg*tX@vrIslvY}zwc%Xw8w7O033i<y
zl7MMVHd{gvNvRS6^YBtQ$e~PxsF;oFpLo&g+Wf$a#_YF-=^^oh(ZKi3R)T}C{iyY6
ziQWh^%UzcsKf^Na7)iyWNc>*)OoSvkWr7&d1`zI{el)-wt9}xilMpq2;+~e5Kl=re
zjU-0613;4p3FVn#qBMUG3s&42Oi*l~ArKbBkkSOF#?TuEB&~W?utIh0viP(3MG*I9
z9_A0H=M^vc_ypRRs-7Q*&=mJn4&MG31;&v7oRkP%Su}!ovS@K*oL4hc63K1}7IK&g
z;+<lTa|w;j4;)&m>taI5<#`h?Yu5VmVif3k3WqULB;@<&cjd)od34)?f_fk3)2`DI
zTS%wdpuyHws5SQZiU@_AN<oRqC0B2qdSpLc3WuZn{RL(0r;QPrT^)n&<@}nn(^M~w
zZ{(a8+c<+uUh-mw$}K*0E7GNKx`W<|%tG5{Fgf%0Gx#b3VkmJr7<^=W-XZz4GBW@*
ziMXUp@19q|@99~zPmW9%z9Q)u4lA^n(P=f{y<#(eIEGfy(j<1OJkoz&DmfPJOz^7m
zVGb=!e%oTBv-aAn-YI@$XXdWUw}p&It7`1Vf$?TM5XkDiZ2x6?HBM0ZS}o0}s2J2I
z;hMMeHuml!XDWHur5_?PW&fx2^iLhpfQUIqfNJhf_7a@Z%*1^t-_$6dwJA}L;2QDB
zrPTNBL$A+eyFh7KoUMBn?RU>jWpP&IYg=hP^_-8Fbk%?9$b4`@d-M9^RLy59&_;9(
zf%^2gjY#vnOt*nW#Euy2v40$7*SNsC_vHFgTV#Myo{<H1Lb6<e)_?(1kDlM2*Zpk^
zknF13{h{$oqbv|k>S`t1>nrqHAZ?&oX^5~*A7=M-C{-K<>(lM<@85AE-STt^;s^A8
ztq$f#5{Me#vTSgQEoRj%dsl6vc<)^(UU709A#g|lCJbQCq6Ambs2eb*4BL(@K-erL
z7D|cP<?Zl{$vm_hvD5yiNP?GB*~e1c+B>M74Su;+R@L9;hD|Pqi$zaLNa<JS#Is*l
z&%kjHMi7RdrT6TbeEebZprJttY_E4nM+@VJ@kfsI>8eb_xoKFAue_(i74-Mkz$LLt
zu>@?<+@H{<f{|Af^qv)egpDuhe}90@tVPPRl?+NPp;_$By3B<)`ZJ>ZRquzqy_yM=
zWelvd<YiEjA6*t$)Y6|<KJBn7rvetkMJ!;>m@gr8nroi<g3d)O9geP_uR;dJ{nGuE
zY;)sv+Op`<;WQRz)=5vty0?%?Zx()Q=g>jPo1&e5?zVmR!WF6e)iKhd-1BIRd46MT
z{uODnD~+sf=VtCH?L{D%w$#af(xs7+jprs@Ie>AP3#NGj)&mo&cJTU#t&V`CZ(k(%
zjggysa~?{ffMcsTLITICGKSPXT9-h$;1z-vn;@o5KaW1f7xrvrPl&M47!Z+EJ2=0d
zeYOJ<Lvvh9b7&%lc%&+>=Me9ktct7<npAO$8ST#o$AqNnZU&a^vEFr$yH|Np|8%}D
z#<aqz)Cpgs-Du5Qi!ZvB3x;kzV~E>-QO-NeYf2!Ky7lRR``cw#TACsg#6&$iSHTjw
zRP^&!_`IedVyw3dT@w@SH)$FWOb$OVeJ@l*Vk>OVkFw`^nC-yc#RAia!W&H%{8J2W
zg{;9>W+%2{09UT@cIY$eh=1YT_wr>WVz`E|xYOLMolxS;!Hzm0I>ZJzjp79(G@7n|
zs9*W2;D`_0eL?^l@*%^0BRbx%CNEfbTXOf0);6h^W*6;Do{BIGJ9ctBQ9%6pgNA)&
zqiEHnIO17JAkOR|fmO*1dtQG(Eq}Hxfu?QvcE{`H*c6*TX4sdmZ<R3368M*F49s%#
ztj^%p0?Kxlj6YMUG-a>QhW)e6xk=XRrWQlSBa@<hsZz+l$}AV`G*o@jkt^fH;Mh!J
zEwmHVOCr>ZzD3>;m#_Po)CSV>4cJlu!4I$M(#`Z<uBBdr?~tMm6p@*~@Ytxh?htil
zVV!!gg(@QW31Ww+3z{!!6Tk3}vt-{u*SE|f)QDSWyR_oY0XhM<SP2oOj!%9J8?f>f
z$6*m&#xy`Dy+pX^hQi)5IBhr0Q>%qQYEwLQ4VL{!CRM&#X0xLd15%*c=qKPbmNyTI
z)(bCtscGhO6s%`LD5teioqPO{TYA2|I^&wOIzs0)4C9>mWfcCV=4VTKuN+F8D9E^6
zH#qDw#bYvKR1h8OX$qKf$O>xj`*5@B_h=)HU&!cq=vj1GNhZI&aE%!INB~t(ngr!s
z$0ruCyu^r<lnkLQurhDyd3dJaVE1AKc>C-*JM%yJv(LqVmHZ5j?|XOdN8?U=sh$y#
zsWT%2hEWs1wS=jezCGVNVt(W-U@c%KIX01Z&q9TBt((7zpn7PxD-eq#p7>L8nK`N#
zmDW~;M!;>bmnFEAtMmzs&7Q0(8lq{XY_mb@gZ+n`s{#W*0s?#wk5%#c4tv533h<km
zjPl``7JUtq8n-;Fj3NC*-V~78b=U4E=!g3?nd`pl@+#f?H(sAVjcn;Y8oB*^-1`Gg
z)8X&=wzgA#pnUD1&xA_c)?du@=VftfrQ1^IIUH!b$6f&xce|CmCsbYb)*!~F!@c0z
zH=@`3jzAoPq5rO!CA1$6oQ^g#A45O?2WwUUKKDbu2$sdX5`a@j{;%uIM`Bl5k9*Sh
z620h>-QdwwTX~P3kj(b{She!z1mwP`{^&S(@V2ImX7nsTV^O{ME~F0R3Ol}Do3fd*
z-4<-|Z8aKqcZbn^7hJ&Z6Aq)kR#PbS#I+MRr#qJ$p<mLi_EsLGN?4|~qE=P{t1#Dm
zFKG%__IFssB{D(FO?}kYpR@l*fzbpe8slEO>JDGt=gesp`jpSq-X)CmxYMzH7F~90
z>-5gtd0v<i2LjY0?)YW38Ws#cbe<bpdXExUpxoxuJ*0)OJcc}S-<rt}Jq7A3KvDPE
zjC9Zyf$*YvqP2`O-!tb4Z3-jM>xQ>50ShY~RA*@?Q5J<-WKH!sb*GA}3?O&IETcRw
zC9lb-22#IXL>A8rGXV+lE(eS=9-iZf9FTxqB?|oY0EWYY>Ym{Y#lP)~j{@PIh4tg^
z#;cx@4i~Zx!!nT;DdUMg;~b9ZLUu_{;mWe)d|0UU7FpI^Uaf;I$DB31=GNlAK5mAI
zbwwJ*JU6dV41;|jWLNkDX<k~|qZ}C{IynG@#-2Rz90}2qVNh88Z194&xa?Le9zst9
z6x`ygLl^u_=8p+wJ-DVnjU#}8O7wu`m^Uh)t+ep#H2c4W%@g_b=yTy4kn66gE?&N+
zW2DU+*ghaG9?*XmB7+q-7$Mky*YcV?+*@ew99K)c(&)Ct5(+lE4OLkJO0u~|sUt8q
zk9j#;uXJ}>voA-Oq@r0*8wO6YPnaHVa4MXL6slAqD*G^hVS36-#=ic6Viax_e0og8
z0}do?pGictbJkSvmFl>N7+`_Bc0$rJE*9oK)`Kae3gkQa0t@<V#YK`<S&zpB(V+2{
zrt<wL{hx;3%}H<kOGDvq2|$|9Yvneq5!*%Os_uqjbj1wS0i-Usy0^WaYnOSH;(|}}
zPxj>UZIfXY0gDg`Np$o*D#a_WEBY)~WF7O#0cpE*!qAGPM+#lP0-N;K@x4|xE<Zid
zZo*$JSGSq47j-J&LqHC;k2G-WPDt_wi;3E$*XbyMWiMzc^5D2$<9paqezv)E>#rfO
zh6|2)m7#`8Q$%k)@j0OMehqw05-gb?<YP^<+C}xK0!V448M@gi?Nw>?P5d4-i=Ep<
z(i9s58R^H%%Sqrwwv!?va&-{J+sJB9+lk?6aPTsYIi30?P>aia1YY}Zb)|Gt*Ibl=
zP#aajIF!=Lw_$N;;1QaMcHtk9__fO8tGwWe#eFtHpruG=XXm0?JIRH#+sgfSnZNJP
zPLIwF!Y>}5Dqb~F1i34`GQN=4oMu;5dUw6jVAy$-9+p>*??^?v#(~8>2S1~}qV{l0
zcbH2;mReM9OY-#y(N2mgwO|k<r?;l_WLoLgzX0dp()Y4DsN;o?mxu^7Z2ZK2gS|t&
z(_59ZO*govjvn3Sv8jMx<q9}Q5}7ggWB?Oru=~TdA0Kpp+=)0${smIt0aO0S>1Ds{
zD;=1phxVkee6|UeAi1-45)oBlPlUWr7`?&wnB9ZKugprp+`M{of^G6ATOWI?e(iz?
z(wLV7AJaa?m-d*gX>$8E&KG4<8K`9D`^T=B3B0{c&Qh1!D|tR|SR*xI67345X|VZu
zH@~$JuBMw2yQ=<WSs4E@p)na88^)s!I;$W635(z}>a}OIpOz@jKgJX)f7Z0wP_Gxi
zG4b2!oP`XaqEt7mf4*%KAa4zD=O@+VST6P4*?e~{YT=>OqPBS{xQ_qgMIt;i47t}G
zD-1WXF?kt-G;zQ_fUIr$m?Gvs#lYf)wM#!z9xXPRm9|zM5a~Os;-u6)oHIQ;ppi)`
z3%sK9#Q(B0rXvEU2ck%zn&pMi^}lZL4<S(CeES?5X4+Ack_^rJ{<67+ao^>9g>trv
zT)Ix}b&)Pf??R)rq{bSE+kL<8JUb9u#oW}n)#+?+A9fX$N_vvN3!MMJ;GqRlqHRQ8
zM#N-34<iy5;+YEBXeO4Ns=g;Uf(hysk?2KNM|ZWgl%`PyPuzC8JMsx`!Vt!IR-AW~
z(y@gWvyUFjRKs^JxJH%#ZgfVBD%pX*Au90@br-RmCB$%WaTNUp^STiJ`h#pq$x-R{
z_Q7W&v4NV+AA3OOSaS0$*1NNR=^Kfi=o_hqi9A(XTRC^*H@0O5?kdYAKcmm&F9kSF
z;N5Q2Z)P$c9f|oABqM<xL$$eIPfK5Qe@lJqG&-rbA7~3|Tf=nucpKg5e?~5(CGtLQ
z4L8+S7gn{L?3>S0xnfs+w(A;XY0Jv!5`BV}f37ap)M$l7AF<M573;VIreF6%gB1~G
z4JkO>qql!5GcD-P2j?kbU=B<Fr#fp8(%a1ZxGY{xZ!s&V+V<70U92BfE&XyLHIqvA
zE@@w4-#0L1GrVweyTg^!*~-UVb*U>U*baP{X;m=Od-Gz4MfA43e2%Eu9x>D|%l?v;
zY_biRH}%c3RG77bM>;cmFA~GP)k;EIS`gpSQju5YEBvQIv#3blU?&L|oQ*WTY(z%(
zYwThjEl}3S0Ce58RFpbyu-7R@|EO1d4$-N;R>P&P5Xl36A`CA`?4CoHFYTB}YkxfM
z9<o-MT<5^$txhiq)Yr`C9euU4b7_j1HRe??_V5D#2-_)wGn3=GyjP~@Ysy<9TLitd
zK83U0+TZ!z*-?28<U~{Wk??{Kt8M3x%l{7OOC*4GhDDhCqV?o32@T~dE}G-v{x>SY
ziergG{U~U36gTS2wd{J*?P?@fO|66T&(X0(KZHO3U*F+RAn}Er=WkDW3eEzbBUfQG
zC%Lwh>0Lo5=JG#pGZ-)tCp_#AUevF<PmA}4&4n2kJ&SyGH?DKu=c6xIofGHYJ5R4c
z>y2E=1>DbIHwcCto%#>fjmO8EZi7$SxK&L#h5Twz^DO7DD8d-T<`C8xkgUfq47_=3
zu!lPdiP{12I7=G_%t%}T_dY6HLOq{1+#L|%#^FR+Y$sPHD-Js}Lf6(NqHUc;$C5}*
zfE<@rGBQ#gFP?O;A6QUmJ0APL(u<1WnD|)v4!4p0lq=j-nIn?pLlqY!Puz*Gg)9MG
zr^1`MqSP}sI*z9Hizm^QUVN4Nq945Nx6RSfiS^vS)>dCsgUB1a><!HKOQWM438mVC
zt(Ns^B@WWZ{pACjf|cr2p)46gcq!&fw)f<7PnF&%``CL{Ddv3|G#TFQ=0z7M?RETl
zb$+%}@Pf-22g6qm1T&Qr15OD*nY~f?fImISI2!%oFYx;y(*IZHxEb&_$PhOG08kh3
zDVLDSWg$VaQc&JonQ8oMWG%kKe#nmRPP6K7>BE~xbjp4%B>8y{oq@J&W_z;p12--o
zW7?9#2pw0gl`mByBy(-G9sLz6uXje}`cYvZtj=u2-@e+h1eYN4v_0w%0t+bMB-GOY
zZEpZqc+x1u6sqNS&Pkmb7M-CQ!4}y-V>e^sqA-X+fL2p;wYu51Ur!i&igZH!XZ?$X
z>bte2Z;YL(V?(pck>71L8weFU|4Rz%5&D%N+bE6`q9rs(jAn~;+k>=3qo(0tuKKo)
z#!y>_Xe9$bzMf?)jo<pEo_F4Pjw)X~|MPf3eTi5dCo0@c);cp^5oNR~q~RW>ICS5G
z8XkT-1XHVRiPC(4pHJXICcWl-8D)mQIn&p}6n@+ZOoDyIv86$Ko3GOmhkAQJl2xBo
zAhlCyWw%RRIP~Db#)C~n=!xtOg7Kmmop+jxy;(+4VU~fC!UJpBRr0i!;b3L{6!8A;
z(^}^wC0j~~AK9Cp-P^^AR}?ggv!fhxT58r(kam-?7PF|)$vgggo6)w4%dLJyIdrsA
zz7sWR@q2!N|Dj9$fyDOsop)M>3Bz}Le%smbr8&vrPb8ZmJ!SLam8#m>PzdaVjoMq9
z`TneB*+7@|6Z(VQ17h<&xXY`XpBW(|2L}bVgkE;|)Ii-`JrV7*!a$*&d&ScY>6dRr
z_wsSA@D3)T-jI2S1DP{CQ6t1KsFVVBURn|8Sq<+ENYQpJb$_e5Xg}*6Sg5|A>n(OS
z`?JjKRXg7se;=!@-A9@SV!LT7d~YcDxt`FVZ8}?My*BRY5+`=7_W$wf6q%CYo+O;$
z`e{+p02`T&?y1FHxEQ6O2XrCpAInKir)6XFKq;flA@jh{#2jKL=u9I6ONv<(5>_O?
zr|R40rJj&!FAZ4Hr4VVihtlKfSSG>ud!G5VQJ|s?|JOo5mzBl{X9-DAO^De~t2k_v
zoFi&~RRchz{w!w61aZsN&mHy9c$_boTrZrEK@g-eR!+*O86x*=X)dr5<E$Cld@gSQ
zr|D0Pe+oPB1aI*;j!bxn(rd4O@E$2L)!2U*{whYMKKQqM>FXTeu{nmk)<IZ7qjUDK
zO=b9j)k=SGt%cI+Cd$5?%OX)o3CTl9qfk5wSc_o+XK`Urd5G&jK}q<+ztA0L?OX}2
zM9S;(j;uY`w=R}<Hay++8#{ung2SXW?^-JT5FSboJw@L^N8Q3B$_>T!JiQ1|bf*G_
zn9YzQk}!MGU%KaGOtD|5KiyAKa-qy>d80@JOtob2CWMb&sZ1&H+~EGE+?L^Sd}XqV
zK<aRtb5?xJkCZ2KV)_co&Jzo+&j{m=qw~HNKlhtTb0oc$_>Ft{*E0sdO_|vR9v(6H
zrDda?#$bJ_vwAjSf{uCJK^aa{b<~-=>)bo>h&+Kf9|}clG)pGybyjdA(Bnw-)hit*
zwS=P5&Q`h)=QS3p*Hn8>r=H4jPARD$;Rc3F-E>$*c#7G^^n#LFqXL;4<<khLRS;+|
zk+AO`evTC1!We|UFJ>KBcQZOV8mcw7(BIfpn9?sJ3|S5=8Zp7lybpFamF9I=+bh>p
z=9G2s)&1!Vv-P#LCV8myA%Y>aGjP*5pMzRf@?XL5@D?*T5I$Zh4u+DVoELU6H+Y^p
zAIO;y2^v6I@OsI2&fj|9D4qu`|JpQ|;!JE>_L)Yvnlud)#kxgW?8XU3P+ha8`eAy#
zES<HLkg$j*1=BQ!YA6T-YY-;%Pv3zgB;Ieh6H(j-jI+!*4vA`nz+vOaGJUy*U=NU?
zCJJJ0c}wq{vBU0Z$I$VRSrZm^1ChK3>K{Da=>8G{R&p{?Et{?H@}Al3kZ9eptZZMi
zPhIKde6`?0E3nMsvNyC9B?o6(p%)o`8N&b9QQRHcUhA)Rz3AST?&pvVP=h<g=TasG
zKcsiHDS6~G^tVPP?-4axgkl8_8fhT+p^HWYxUa$;4=M#w<O2}}QPGux&52Mu3%@*~
z8|;xoQNe0)Kwsk^uDesuejn7{%&j0QX*HTF?(@L@kidUuO?y-2pov^vFx@cuy|HU=
z;DtWy<>;F`dmrje=WVcYebtbmA^?>e3|Wd;0k2D)FfJhf^^!_oyg+sLH+nX)jfTij
z)9)B@|KVL*ooLP8M+{!9kIn3xccAs1$4Q$ed5S!52lyn_=a>5f%1kPAOJTqvmNTot
zu9TMvwRoq@R?^~lzDK-`k}6tCeQs5}4WjrPpa{u#_+BEjc6>@-LxX^C<T)D?lKN{h
zerwi2RM#0bea-2|_9gXG0J`VT;olt(#tr2DnM>XK6~^&hc-1SGUn<)bUx7~dm9UjE
z)8HntAH<J8_Mb;HBV1s|pHJ&2bTGZvj3`v6n!8*;HNMl$(zmf3tDrNc{d0K&`Mh7>
zzez$xj|>hml3*prrW?C+PtAhz=Bs5>I&Ck$;}paqrBDIn)|LmBepjTIl5w47<%LCC
ztJzEL=vi88TT6(+xV#6ns%x^0c%{L^*SNo1w@mfimDR>(x~B5fxWt`@7;v(QY$Pbg
zk{kvf;Bbid46x{1ZtBXw%VtJ^6B>T&I~$(WYNI+V8EQg4`s|v3Bf%dhJBiJ%394vM
zm6dR}btkvNAUdkI^cWfw()p4ePfz5@8L839A7Pv5qL$HVvR-2kYI?XUm#y+H>hJJh
z?FR@5UKwHVp`Qi1V-+%Yvjh0~U!RAgf$$jBIe%o#0FWCsbC@_K|A|1clIShmA{R%6
zfToRA2A|77sRi<j>H-BMm|+^4^~3l9^j^&;ao%Pj?t@@&en3h44?OdJk}&eJ(?3eM
zjTV>5y6@i?N!WKB9EzZ7N(8!djnpS=fT-Bj7nC?8<iqVvgG7`6RX0KWdla8!z<mwV
zBi`g36sRVVPl&!;_S5rIgMyh5Pq3th&RF?lCpf08Qk}$XR`aZL`c=MwY$bm*zZU2R
z`h%_tcW*ekF5n;@O-%NK=B<#4o^qHPp=UB>Bcqeogy*!rRnq0=J9;Mi)%(>WPAcQT
zanu$pZV(PXHJI3^%$UIvxytex>cr5zb;ym`asx>_0|?7?0gZ?b)$JROPP;CB?v>+d
zIoO$B7x|*kcr|C%xUde-FH56gD!jM4(sfn56y&_?>f9Dmm%ZDQ_mI5})lK0irnPXj
z$C>Q{(P?>jz<HYInj1&k_mZ1C(Tu#DA5BaBRN>(e+O%IA3IpS=+R@z)jt=x~oenLC
z<oCtyV)Y*M;J!VQ?<(zWh9|8j^}mO+9FsBvZxPTC7#z%5nxMD+kMqROM@cc4I(V>l
z|MtP=A5)^4@#E}zb&=E1me}|vh_5^kjh2p`*RC%^k{?bVfwwLP7f%QIrJBE<?IktV
zxwIWBiq4W-7VuCnn6cnJtVHUrxi%bij7<Yb@8R!=CcuTUp`ByOj!K^Lp2G`$B^N}>
z!4FolI<<|P8R|=oRQ+_pfX+K`?9><VkT?4rWHD0EvK&jcHErb{De#u^V&8##3q*TS
z0H<lZc`t&3{DO+IGc<14j?#j9#86%6urnmgIq$W0lRAex1P_=tP}Z>Q?_3xi5o=E$
ziwk^uM3RQQZnwobH+<ohPh&mX5ge7Ag&wtx_UZ<D#NKaD9XHfmbK5xdPFfl$b$}jA
zl7(~19Wb&7<?~jc6wT|YG1=?I_HL_WoXUwofIm{`FPU!!J~z<V#zWx=x&_&<8KULO
zwLf9aOX+=n=!WCy4~0(Nv40{&2IS8*ZyU6Ox9^?pp*9nJrX^^wwYRm$NZ91Fucs$R
z7uE5a1bkU9dTTaIo*zcxY~NkH&y^(4_1Rt{l(Y3j+||~wF`QU5?diQF`<|Ydg53f#
zOSlT{y%HLYgjQc!au;xmdHm=nZ7)jn9Hy`XXst(9it<1eAxjZjIGUlqAM!+ML<4oD
z6>vI~a20Usq1%XqBT@Gao&;8RJ5x0OdgS{EpY|D1QW2S%UCTw*)2IHKB@=wGDt5YC
z-4OTO4WM;!C6CsntJ{gmeeO)qh6EyNMwj(Fhia6m_P6j(yxKn_V+UM2mTI!C5UqMr
zi4`rc%?+*dvIBRt!lt0~n3bJbFDGF!=xStx8%S>8INptS_3G0;cI+L<*A!4WY7eRh
zo_BM8s+9<x8d!kX{$iQA?68|~MR}3`7?z`~fYVIg?PRZ4tyE{5J%g2g(R*G&H@=zk
ze(4i%mwP8xXFFdX={0F*1J6-D7yCq^PleBiPHFYrOuu3m-zrI4L3!Ww$)Ago2PY=<
zs}7qf&p#8FwmrQl`gH*uI^L^s5D^z`RZNt6N(%yL;NGrRy>=b>Dcj(7h#ov!Ck9-F
zVKV;(b>Iv{se{M<|F{5p7q?R@{a=R)X3mWXZ-+xU&MFuA&SYdDvpS#%39!YP*!jXV
zNFq&!>8`tvmgyx%jT`%4oHKp}Yro;ql6TvLR*}HO<!0rgTVYssaB^@$<4LdKuHlQz
zQNV?gTHm7A)!hMyM?la~P|{KWZmeui66H-=AoZFO=d-N-Y_xzYU>xLu;z*o1Un2}e
z<qIBXD>aUmX_cJvJ;U`q_qARZ12*j}#)0nm-gmC@+Alz+--g%D?!bX4i{xnQ?ET|q
zt4=BYtY3QW!e;&rGU*kfE|~>cC#CNAD`_i`i?Qk!xto&h@s29CuQl9Em4I_=?#4v=
z&+OC(3{1mahnglcj_4IlAZah*r}2pNCXW<?%zMq>ysO@%^jJDUJpd>W{>$#htlD+D
zQ-{N?BhuLT_10HZnrE$n)|D29Q$mNw;dVKZjl)B-y^~a)`t|VYsWS^vViEV7y?E(Q
zcyGso#-;1vv`S1(dW|^9g#g{2X&OHKb8N4iZIIS+8mG%?0n&l8aIJ^PQyaqg=PR=4
zQp+6me<2PER1#{D%QGOdd#)Q@r}+roJ%BVw?P8G)cB3wUD-soqRTShZY8+u)F8uzN
z;%3|X#l!d-5~hdZ)5T)7$NUr)^F<KXYC1%3-ZAyYk?Tcap8lJV!05wyBeM=_0;X~c
zK!39J4R5%b;BkP3nHvDv-eTf0z`r2yNEh?oBY94_5swoK)WLdLdmWk;c4T}KqCZ2g
z5>l1Rb4Y&v3Jn#wsGCT!R_}Vzam(>$=n28$045Rst^e@)`ot)Aw7<Vl4wq_`T;)tH
zN4)#Mse_aM+1-Pu2Wl&2e7U#I;%F>mRGAYSy#<ZI)PfnGu%%6Dbm@{?WVn)x5;dJ@
zoV0;Yk)E&;AODL@7H?iV(Qqgq+6R&&ii;8;3F6;kfC-8@C;HF8)Z`jN<RJ-6)U4w?
z`<R^uPFn)dnyNFLltTb8nTCx{kPDzSU5>C|vHaGAoOHqIcq`lTG4NiRz~K-$T!?tg
zrO}I5jl2H?$hhVuzWUPX0*`>0cNZ?qaR_woo5oAzC3&rG8R-*{SH+$4NGLG-r=kH0
z`78jZJWddpJ&1`%bFL81Xf~>J45i*x**5+rcb2(d0(|bE2><%K91(-rE2Lw*Kd-HR
z9KG#ijI|~<=iiYJz$ks}f6PIZPJ#TmBH%?a=;K(XYRzRe((%Nnw=LP4oz5ww>R}cE
z7U94ol7r`K&QEY9^u+}IOtob@j-oRdqlFzp0d%)HTlLk|(I+*`XVMA6oXplpcADkJ
zl8Y71G{|G;0w}(o_%4Catw%qfx|yA0X8iMHPJ+O&%`gX>Q=rTx`Ml_vi`B!pAnY@%
za)8Zrxt$#xLmu2Q|B`)79jN`mO!z}!$K27L^SJH@h&-woOgH-G4GR`av&++q&d<Zw
zlY_#oUk~X!{+Pk3;q><iaA)*C7ggmp=6R}kDR(Q3w_@0SZlI+<Gu%XStavzwb{6a3
z0_=oT`P;*F@&F$pXqpf{yw~m61FLBLwNbOUpg2H8#VTZtp@<v$--`pGeqlYFNzq-<
z1RM#L^#T08OU2dRA#iemNAJ|!QT!-j(-e599NXy4w)reK?YZeU|3(?p^;o%OC=~VS
zC8o*Mxs#o9+`WTpAg%+n+t<cB@m_25ITIajG{V$zI6ok|Neb3_t3Qb5`&cTRSYN%|
zPx<0lM;N`)gaJvHB=&sl;kJ@%gbU5+5fp<lRvcUWh4#xIUt7shT8&-tAmFwEqZvOe
z*bLAys|ixcNyWT5B6HC3*6DJL56lttY3#|_o{IBtglLZUpX^GTFSTshLiu~c-h4{;
zz_C+V$aE!?eUqwpZf%iS6~jt%eT}JE_e5t+khT+v!?wiktC8pGQls}?YK`5w(*t?~
z262(C@?Z&&`pVk3*j1uBBR{i3Zxs)&+h71Dr)`?ao&9%O8k`4=kM5(Xq10L2#|o}6
zi8b#(FAx*-&t(G1yIEG;T#@TAVbfn)?CuohZg4RY{_l(h8a<Y5SM!=ho5OQ<{rrV`
zEAc43>6nw-+v1ylRdo|hd^u4&c1f|RYptcGeokq3qYszT_qBTj{wRmL^`6)4v1+%s
zAcvH7bet`-5!zEYDWb^&(OV4B6j207*<RiT+c~a09d~o-ussdb)p~(-Vodudi}eAj
zO+G*It+x5c+R2i1*(`=>ku6<+uSKyKx5#YgL6FS0GoZ6w7m3fW<^9nUfqV?LD7<C&
zp0}5>3ofmvzkf;#B~?7*w`AAT%g*66?+MdelQ1bPXj{Cu-hhfn)3kB1<U!sGs+D?P
zllGQ&r0%h6ZxWpf9Ugz4@1Ag3=dK5?Pfdlf1ZA(jq?7}3q`(mSuXy|K^P@Mdj9N!X
zxy9X=Qu$u|ql!6<2p|o0b`Y*I?9FVU1mutUI0IsIBfOYmf?-#t1T^U61tzM!0|=g0
zl26tpT1^6b$`}$}S*~a-9$g-s@{0)-6eis@tcVN=5<FtGQ`<Z3Kf)I}{}6|6)KnnE
z$EnI~0RjCdsuWV5dKSUm1r(PALot$n3z9<eJrWqawf?nhB6C@y*pN}0y<M==nC@)8
zqZou@W#j;utebQql|e4*0RBOM@s{9`?Brx|@@AD~q`Wlf(^`a+O2ExW69h-O^mKZ9
z<$C%kIZNUfOqBT|p)#o?9NA_*3aGFx)qMN$;l-rdgS@fme-#9<e}w{@3E*v!A^%`e
zFbzOh+gjfW;{zkV9yg;U$_epVbmRGPJi7j&4+<L{%-6<CIjU)z37c0}?r07&z<!mC
zJ4c4zi~6*4M!8Z;_Y1G%$6J521fK#o6U0mKODsOQ4Jp)fh?&e2m1CxWT7gH#C)fWo
z=M=#K-<V*kTdMk_FQMCL?nlT{4}n)GiGwh>P-#*;0}%r+K(1^6?d5P2jLKYP+A;U~
zh7CuiZ35z#x;w4=12jf`8g27M<I<NCOi0aE%;UQNce^iySAbT?(pe+V%Exy04fYUP
zL=By_;?*vkTA;dqTXBq(+|I@M!SOzUpt8LY50v)W?!VFnxDDR{WlY>sOVy-ba~oEf
z?RD}c$VO>j(QU7**FscbRF+-hkROk>vU%e!2Z5}(__k<q9##|u)1XBpwzLNb>Q#q^
ziqa{{h`YMcjmG{Vt|qf7#gFXv8xXVWW&B5D$Q~%JQ-*+$nhd)EjYNx5M+r77tHP7S
z2hM_z`}*5<gyjX57M-J_lSFHcdV`aYujniYMv{yA!N_7MA2-NwMByOfQkNbR5BSX+
zcn9z2$-BF&?&HYf#IP>tYgm|7ac3+<OF@G%k%iJBk|Aa2-0L~KL5#CfNZfzB$|y>q
ze<ul4%|!kdh*rX*`%Pmi(gibT&kkx|EX1)%eX6CXBFn@9QO-$8BLF4_0xg9Iwx7%E
zcT>N7Yh2U}5o4|941n(g9~y9?XBbn@QBouYDV}hkwtqYH(r2`&E2VU<SjHWVB+H9M
z6GiF>UcRSYo|0334Lh8dUZ8LPHj0;a2@`m5()#=iDQ@VJkbMI`H9P>X+^S5~Jn138
zn9kZ)IBnz!FrFId7G3v<ddTog0u#AI=C)ZM_kID6sDcggRZcV^Eb^LE%Sb_l{rdc)
zcj!=6_&?W*mFDC2C%P(lj)FD4EpQDyKWky%n2qIj###AJcSRkxcw$7@_-|_cb~FY9
zl0@Md%6h}X+PW8;+bmNMSNu>Wf8SRYKh-+bzF+;rHjk5@eQ(1!S*LV;7VwS;2@3#s
z?tqI9_x-i&xXzn*-VulS9lL1DVNLMk{8yxqHuTpBrPloNA1XA0pKBom^+#riu8rPo
zhTal{gAka&lYIWsb0N@qIR~Xe!Gq|tmI@9E6$o#zthR(Dsur?Lf{6KLegNP6LDJ}E
zrBV8S2>ZwQy7uRN98QDAww*L~W1Ed_+qT=-wr$(CZQE%0-#tC&`}^FF?x%ad_MWw7
z&6>I1*J!-VT~m&5Ixlw(3Py27yQFbpU@?q;lK)ntj@$SN{&7s_vbi3lJWA3l(R>y+
zhpfS^BDMmB%ZVT+d=;?<0U)2m7c^X?n`>S)xPskHTfxtxTm<;3`;eudm0{I>m#<I|
z2nClc!>w_r+)gOBJW5${9<Q0>tn6iY+hW9Em!%XHq_@Od={aFl!yFng3IZfU#3~eq
zGgwMexdLVg`GDMarxL6N2=aG?r`kjp<)`{#msCT7en>>n#PoD@?kuDWVSsPO!7y_J
z>rmT23L(*D15TY6)w7o1V&%yklOErX64xQzF(I>Wu_fjqRS_Feil5Z0W$#nbPwb|T
z6-a0QMQD=T`LJcddy+H+fXa<=59ANLUms0xK?%rm0}=u{B0B6yf^;7kAM8wjov+?_
z6bH!gs&5YafWT2$b9P9J5&}kgp(1}ZAB6xby#ufv*_xEj*cX^yokk?D-$0R~?D2Ua
zS*DSnB35v~qj17k^Y--hSOAKP_T7OfR>3E2uV<E%kg=wvi;K&MrNF?|iB3qJ7}-C#
zwU-w%Gvk6pyty-_V0Y_NaYh=WkL^8dz&xM{#9_DRM8MD@5EN;v-=Np6R+%FOy5Zq(
z^S@a7$eEE|gA8dq^S>6bY6Ioca%?tS95P5!&wPD&^O7N#D6py>Pf<@@bX?enzQa#{
z-=^517}<UnhvTORT;>bqcj!J4wo-OsyTRN>$!5NXS;7Py?FIF;y*6)sW8QwM_XlXy
zZJ6{Gw(H}vLHSoQt>)}#{c}Hvf9?m&&C{6cCnIik3y$1o@w;5_5hS}NWjuBO^3?tj
zT1;*`5h5K8ArB7^dmJ5ObU$q)EdtdtU&dLJ#`9#b73V-Vk?WgWSKveeE5tHrD%#s^
z%)2X_(#j)Mm-FlI7hdSV1L-`phRqLtSGRi4aJ%={w^XpI^Bt9t#t%<x2j!2AEZoln
zsS3q1yq2o`&Wb)GAGtl~yBBIz$@6m4>{BxnurKIGuj4;C(9qC|f{Gecg4-4{77&i*
zYOt2kqZV<MtRUp@h`EUpM)SCG4LLX7$h-qJ0?nxwV%6=eRJM|bcLLA?T(N{uQ4;^!
zzZ74`QG?jGoEjtSAIx8+Nr3wT&#AM7Nc{cvFfo}^L218|v$IgJGbExIk&)!rKBwID
zHR-sY6x7!<bISy;lEu|G&lNut7kcvVvi+WHkdl$b7e+#~`)*Vs_x&9i(4>Zu$sY!s
zhpR*1d?wIQRp<4&&9}8J`%;<P`j{E0>Z5IEP>i0jsLnMN^|+lx3)sM&DC*kLnx}hr
zJADyF_uw4-g!G_sA(qqawZ&WO)3A*H7S#|letSr3!AL;6j%}v&?nYje(mZ}|SCb8m
zdB6Fo!4da%i?Czez^z4gfp0*en6Yy?x?!Pak%JT8(!1C@&>5z5_APNf>}-d*NFOqP
z`Rf)za&|1Z6u*OCUPJ1ytxd<P_l4Id<xA;E=|!Hz^U96{Zp%@sMQSq&wbLfWq()Wn
zzs>_jl&|AO%)s9qqEKj>fRqR_fJOm>9}2K)mm>DUK7<4bD~{iQyiiuh_L>D9e`=f-
zDb^M3;dEd}QqH>C_FHHA@z(V_=uqZ3A0XS~cf~cZ;`crh4reXns8!ub1{~dnbX!>O
z*n~q{^|4b;S{!hjI#rRjGRJCLll%DD9?7wUOH|otXi9%_`TF$mtZ9yhzv&)P*WusQ
zSZ^>=V`x8?cD`7P{lw)|<GNC6oY~NkN7EOH=G0dRQu6*0R*p`3h4TGVZAs#>T`ZAT
zcwK<dc`CV|=`{Zw{}?=sVEv}x)n8avD}li%nimq!xWK&H2Q~*kVP5pQS`D{ey`k^m
zW2}qreRHfwl0`Vj+~}Je_bbFoQGBFgsnxHbe;t}E@acl+cXH+A1agUR69`u~DeK$f
z7mP6@!)s8IMAFeQ)IqWuP|S5uJrQkpYU$)8a((is8tG#N`(K2j1vZ!tXgXQ%XJvG3
zrKC}qv}bcYi++GsUZ(^^-?wkk4VZ6Sk4aG4T1asG?U2oN%|DT_!R1@mK6>dKdMeTC
z!~44gG>d^)AAPtwHd!Mz9fmMuKsrcX;p}~Am5#aB*d1Yy-kt*8kZ;mB<*jxC8qpoZ
zdZM^H1T2P`nQWN!WPifFXS_9Jd9^Rw*W^_RE<%n4IaMESH=oVTZ-AZ%CY1JIBm0|E
zkY@VnxZm~%hdvy6Oo10T*G#b)%eW!4@s;em!7_G>^f%|~gZ*(bKuGl8zC4@~N2DdD
z#>FFK@yF34G1aQtu5y!p=T1zD>z5c66O-7L=bS&Ur>2b<84;6_2#GS8CJ9XY0u~&L
zR!&jvVqCoWute8P@XQe@e)#1Olk`gNJtCMpLJ+8(nT;pijuP|m`!RR5+r`0loX>}T
z`~coA5+4a}zJgfr3q&)C=$44FH3rjd>t}Sv4|ziCTx-Hw(-<d{;maej*~uC6MmWPu
zuhCl(vgqN!On<ryb!ekjVnyHn>5UG9My~{QejYwIAX~N>*TNf^=4Vxn;!nQl1~4jv
zYRPJOUW9e*Ng>)>H4?g?MYxkZrP3rIPKm!Gk*h>&e1<df?&qLUB$QB<F(Eod{z3o{
z<mr2>9;xABX<3Z4xSlCYZ}Bkck;8)U&71);lRHO_A?AjZ?#h(HdsBro)v1ad&a_eV
zB3phKvASr@DFUhNHmjn>3`;IEBQr1&y1g(EXU1xt@AEpXnY1KLc+=v(4>mCyCHv#7
z{fRPPUw_51im!f&q-m`$7~f3V;}P&;^DXnb!?HJ{6zTqAL24ME(vzHHRNmRc!|K6W
z0A-t*-)S*^)q4AAuhe8gvtiz1WMx+2`iSVww-|C(KR=}Lua)Ec3EO+&JbrT98WIY=
zyO8(#&!yKH70)zqAOgpt1y?G4l77Upptsn!=&v*%Wv6s|Wl_=c1uof3*0$XZ4b6ma
zn~>#0tB$4iD#Tv|^D3~ekfpUKGE#kiBJ?g<l)@Yw<k}B(Z+t|4AQ*_Xy||XV&zdLR
zhKQ8vy^;?2jjr7n^2nQ2q3-g0zh@qIOjBPJrV+YEL;45w9ZT_fZR2I6VOJ3aCNV`#
zoOy?o9<ktnG2v0*m?Z<5+5j>W)ec&#>ZLj)s5Dp$wfJ`ym+x<e&fYA~6Vp~Ff^G6o
zkJK<GGV5oXGZq4^!~mR}jS52K|89d2DPZ17;7g>6bMUkSvDbV>=Nhi0WwISk3>u|~
zQ<PgqG;PGp6k&`w*adRsK7gUhg3D6f6ovlx!ZhuGjSaj4^AwF^9<jptq9Ii6w00zM
z^_w+s5;|8wfizgsKSAhRRU=~?V{MmgNr73cxVMf}lo}k{W-r|H-wCzv3uVl<x}(bi
zF1Dv-v)e%WHV3yR{4Op?Y?}8xy%uAjY{*yf(NEcXE?FlYqHbIuyEJ!(6j=PxZaiH2
zjc`#7&&zD1o$;Z(cxoBoaS|hdy5!c6g`Z;KNlsCF?pY$BTc7~Dk+?2I<3!um+R*D!
zOiSsgelC}lQk3>sV0)?&4Cb#N!?|Hq!MA98sBmoJ5($Qa^YZl!1VnM<_L=`hxk&Wn
z4~VthnDIf{9N-U#aF!I#V5h_fOp!@;klj(R)0VnYY&+oIgrTQ-#cfxD+P@<moTkbn
zAnqI;YnrT%qwnEh+&MLtRn<7=EKAo~Ei^ow7Z&HND&Uh?L|@zb!CE`IstoUHqbqLC
ze^WR05XQQc44m@0e`Hw=9e;R+pmVz-_O0xkG$=BkcTE7+Ld-$G;u1NdIhSi3Yiw4U
z+!d)uYrSiIQO*m|?uHbh&<WEo1EfpvMezractv@|NQxUx)N;fq?Q0$UsU=FCx`EtU
zckYR{5U)~p8+S!B@RcgQhaky{qiUHD1UAEag)he)Bd7g?E~2~VA(JCsAbK`|Gk|-r
zavq^APYLc9Kycg@YNxv;>v>M0t1jR9D$$pxf?l%O1TX}i1?T5f772J%);=aPckk2k
zvb0!ns_W}AnNbl=%w{(vN~4g)IdA;uoX|*28E&44H)Ad;W=Bg)Y3+Ij$W>OFBhE+E
zm2uVH?lufB#-KS*-Cw|Ms6HCHtbWEN;Ex>1azjjMY`zs9-F)WLCM%IqnkG^XNz8kp
zwJrtb2Zb8j{K6}c_Bt<?pY0>i^i6yr_-g6=lPR92nej#dGbh?Zq>#s7|I~7?ysCbF
z+xaWQ$N@sAMp)7_%}fU5qR_uS4Ur!E0r3!iEv6x(Ar!n32GodhL!kF8Cx>tVZ@Wy9
zWqm|3xVy0x0mw-L%^gAJ0kr|`g3|Q)neR97P%uj(`BZJ0cnQdgi+}S^#2xlt?H}jq
z;@bj@cV7h@2z9xFdn46HK~_gs3p&1Itw(g2#Fl>|bg}2FNCBhANWPBbkwa{NkmTt1
zS9t~_mE1xg;t)n=?kCc<mby(kJ9=PCT2P(Mq5+Ds61b}L1Xo6TC6J4a#T_7sg>GoS
znOASY^0%-QfPzA|lD)_F?YE~t)s{EaNt>87n>lPPlEkjsduY7+^?dnFBTLFM03yGJ
z104hO=Y8(x105Ilx_7(|bhRg|<neC{jy+;2*EA9(#jv?vPV~dmZ`klB1H~d)2VUCt
zN_gewy!>1^Q<$~&qjnFUMnhU6sC~oKzY++#Rqo)c2(*~lGq^1OU^S~EnJ`j3AAEH#
z`A)<yS?tq?{tP|FRlGd}Vo6n2Qhr4=F!xh&OK^UQMmiwAK9NtBu%N9IuJbzETpnNV
zPJO{fWD%s_Fyn&VW2gYvb{{b^Wt4Hn6w6)U;4$&>dN76+(Gg-h{4mhHd-Pv#4gqYu
z=LQ=GZwJTVb%j~*Oi|0?P;2LJqa#?3|5y?NlpYsnCiV)I#{KIG7k3Kbkz>@5?FbL4
zCFZ$b9#VJyG&0pa&|VB4xiz$Zv!o#O9IZI2RtM41f<wp}tLbB&s{30g?WY8xP5cWu
zw-kj3i=yvbbT*H9EGV9?7-8{=wSa2xXXpp`qog-6?<AFsLPJPmU--Tq{!YdM>omAc
zL5Q>e!(W~AUH#E{;1v>tbdd|_%Z2O-L~6Oi=x?mZ{v9JBV1&KZfyIK_3Q<K@T@fFx
zk=d@el_m@;Pqut2roA3P4NAtrLzaARIj&a-2bEBuY&Mr-n2@3b)9Q4!>`yZRl1p@p
z5jGnGy}SFjB8A5y%aD~^Q724qpy7?ao@A^G%nETXYSBMNFC__#<ZVopgbn{GZyyxO
zgmO?oZo9W5BEVtu9H&uGNY6^@-7KO{mbDtDP)UEAddkTzN-yVrn9swAo1)8-atsrp
zi65?Anzoe916bDjDu6?2MT_L@EcVi%GIwC@LI=7@SnURT1oz|W?M-<rIJ;O{{oK6u
zo_&TR`)oL}yZ4Z#ZxMq%l~AmCHvrt>CpBYg|5xlx)(wplTGt{!kcL7sSS{F6_(pvr
z4av2lBkqbVWUOpY9n($%?1I8N5sJ?SGPxR@Kd_!&3j(56zK%VmT~k{o^czitsF`1F
zSXO3iQglYz*qF?)r1TMTuBJk-q$F~|K3{~CjI%@`Ux7b86-!t`PXHAANbY?7`b?*j
zzln?qJ|WXpmX^;ca|M+OWO>wfP&PBqK2Imdf~cGeZy*0}%rdJazVv}jzZ_a|olk<_
zmqG`Rbxu#zP#oGbrx%tw8<g5iY3yulD94BNYe|Uc#pjK0b0?{^;$0mT4P^naLMHJ;
zM(3Bf%PQ;74&F{%1SiKQXU2*il31Crr{rNDI|@F#j(H*j!wB$U%`W@r#Ex*1TPsUu
z$66|4F8R;fJ%#LVv2vU&IjRsnC`xl00e4QqAtDjT^LBe4zCCE|TrP0=REp9m#m@e!
zzy842WHL|^&&=pJi6BTlTfo$~7U4i_PA(QbysSZK<6a#p+N8t`@_70Lb0`c?V^0pF
z4qXTKT*uB|1&V$tnSI24r=db5gv_b6hm2g2hmya`v>Ya=e86Qa{DSsI$8wZ_tEayi
zW%!-KeTSRV@CsBg%dTOcwLESl-t6R@T=4O!9UUE&|90BKGnh2tsE%{$cK<vRcNAt>
z@e|JR`RDWNe5iYGicA@C$Ch+>9gxXU#F*bgfw7&Omx^>+v+A;2qsaa5m>!K>Ze^GZ
zf`_B8Va}mzScmk7=}v&4kz9S0JH0u08g=);#BIsSne}W_u*D*YwpsV9A@%}{9|wQj
z+YGgDMt`L%XA0=Z7hl4Ay4Bjf6Y?cDD<i91;rRwgE)b@Y=P_y|5i-EsUOqU(f_}eD
z2BX(d*+sD|aZEy;goZ(zA@vfg7-v)37UGM)o*z29Y2OQg)F3dc(fD&h=-!7=RSkzM
zNlQ$8+zq3X*Af>Z!V)Z8VlZV65wa#k>47ag|Eh=pDU6gVFC8N*U@-RBS&WtIyigg;
zBC-tnV?4Xyz@GeDD|<<lEkbE-9x!8(*^74i_ByOQu6`O-5bRGqE>pZYo2?6}2Gb?5
zot|A0u{ry~R;v-1DFXyOPG7r5iTZE-6PuGSJqLti0mm#w4jNP+xkvA6W_sw{(;QJd
zeb-^`wd@h9RfL<3B(@Y3%lyuGL`+L@lHeoh`O)1$gmO#Ke%iB&Qm2znSud3zQ=4zz
zL+YO_ufqx@@U+l5anW-+h%*|76M2E`uNR~s@0=eq#|`dHdpi9he4hBA+l?NdBZjUV
z!r?g1ffzrp$A{|?0?Jz@ilXafFITzxoPx;&vnATZ(eB|M1?+hT-n<d4?iKqe8aDV%
zMHT|0WRZEg{4sl~$ME-Y@>dbOKZ^^~<thrFkd_^Q+->8Loog6&8a8hnyEF>_s|C>i
z32<V`z+Vbnee#HAFkr-Eq)br6Ntg@YzM+6v`%Xl+Zh!71NJwLSaYBX@7C<ijx+bip
z;-9|X?<g)I<D83?=I_i1@MgVIx<0U-n^J0C@i37Lz$3>2$RNp{^;Ll_>p_}6N)_B|
zAC!+V%3M1d_r;<Kb!9(;G^p&L2@x{`q*O}g&hb){G~V^>FNy8jd$wjL@qwvm*ZAI!
zq9*O$*YZn#&1Vo^EErlNwHz=}4loH_2fK!fZjy>_5&H}F6Z?;^9@}0lPH&?o(CoIR
zMuwYa#psx>n}}~v*fWFFOPvu)o{7Rm2Hw#=r<^~7*VL2n%pAy{FiKKdHlO=nsQ-Ea
z1?&XvOVOM1#i6OK;Y$c~^!bY0R8%E8atQlDg;2t0mFy)8x<L7hfUtj%5;>uy1KNgz
z4aaO+t@r|`5U56x-zzXYXzcylE3@{WK<ZZ(1VocKHoOffMlBhiL=`Trua(IbhfF0t
zhFn;kEP(d|DQz&HA>O=JyquouM?A(9!u4(Px}iw9YzdW~v%Y|_rLi@o31ilUaUoCd
zNg0-%G!DRO5IT7M<Bu9f;W#ZdN0G9Qz1Z@cRA;AuZ+6z^Jic5mrW$=|%!2hgvZNtm
z`VgTuK^BFlG#Z;GD4A?R$*iMWGq{3md8HN~Cqr^*6M^Jj0sWxRmf;~egge!-K`)S|
z)a1W(d}b}Iz0$OwE6OhjhkB>!GTxBd#us{Xemwi_#BHb@v*i~DkKnLrdTOAiv7$NE
zQai(s9S{&=5V-UN{p)*a>T!Y6DimcBwL{*nFM@_i<PuH;5ufY=VC<f9j8dsf)DpfA
z^(DiCy<k#z4?G(rP~N=LVoB>OhP&fA{+}3RD-2m3%ZL;k+k8_!F)lN<JNt{Cim>xr
zLe`|X^brZ^pvepgvo(N7hbU2u6PcBFh(+3EGDQ;67eNb~j;FkiRd5Jo=9ln<V-b|W
zq6@$hJ{lVb&^Bk(Ry3qN98*)rsQg}v^Xi6s0fbO8U6@*E9k8zF@{*rZBWwAhG=fQL
z>hs7?ZiXigB&q`_C{Am`(eLuP*{#y5ku|M(jfbJFdF`1@PR9tEv?yBQsxWBz>NAT8
z+E*ie!DA$5NUSFf9tV@%FAz0t2@-_%vPK>Sr7sX^OJ{<MR4xo}9K#G^6;X&>9od6s
z;s$w*tQ{x<%o5A(Uo#P&ZbHv9&y3KPA^!kMU3*Z|8j6+n_?gD>e4LMKs;{gpKa)zQ
zOYEh4|1bvAntmx1(S|ZI-%1lwN}>SxKskEk$I>+o;qRa3?cB@a^GI@2*8<XJpyDR4
z=T*7sc&jq&CbA>%jxB|`oTsy0a89~RDt5d}p{o4*=(wvVMNqEV0ZY~8bZn-}(4y?H
zZv2*33&kAKTnJJRrG0>&trDjUzf`?SJsqt#i3Z4KEuEMQi)heD(4IvfX_X1!EU;34
zy?-tp2vQRf_z>j@5$HGji&==Vi!cNwY^n3z>dK^=P=+o`Us=bhJ@)Hz@H^=DFsFH3
zX@kTEt%p!7R8!sv?g$ZN*n9StTPSD#X>S+~X4S*})l$!F-J|U=8uT5~XPAcj`r*-_
zjb64ZwqLgu%6M$s5M&y*vI?zhi)&~nh$W37Inacv@ZEu)U)}}Xe?gj?n$`IH6I_%8
zy2k-9M4*iHA&8<JGtff00R+&IId|wJ95B4qH!x@(KG*peqFH>wrM}s0<{cf@(Rgrn
zfj?elNo92`@$1g1244xCGIhWM3fpZULaAZ{QCsFxrtzlE3;k=rXT4HVy^oFFUQ4R8
zi|Cq>^TCc$R>R|c7yiQ$mBTi59=leyx$juet}#_{)HaHKJv$5x-0%bYvQRM7udlf3
z6voY#)NI?&CI?xfr;(mo&rUWzS3_mI!O1|jUtMf%Wy9!~K(*pD5?-&*f1;kvJI?a~
z;nGyVxQwN$_AT;GB%<vi={L(&oBX*~09HZxV)Zf<$ib*?^ov)8mJ;455X(wv1Kj_Z
zYolW$^hUwZia$!Pt!@WmDcRFHHu6q~hu?zEXZ|tfOqsfk!J0%W+q3z7KzeOzv3C6g
z+raA}lKASJ#>%w#$Q72vi*S4y1%3aLUcT%08zN5dHWf|dhcFr;f_zb#&9rg0i(K_x
zosA)#F&quN9YvK5qJ-60st4MnF)!-b(FwNR(Rt02K1fx*-We<S_v#FZQ0T>1=gkAM
z8}8Q$u%iQfHJaLVw|6S4K1Xt0A{M0zyUNGPXoeNGU49H)*~a}*D$~l>vZt}1csN*!
zUIK;RFqYAG!n4iEe@uGa4$b1@hXSgWE@q;peBQ*yiLW>1G!!<SwXR#UBz-z<p1g2;
zCKJ^(3=a^@))y9)8hnW3o0nU<H99*B@R-oB{Jz8a{n6lbjPLJ~C~ukhj|Q{SsBNG!
zEGErHq-qZc(Poi_p;%rC7sL8V8SR?G!l=&mXRDTjLkGf0<}-`s%}8fuDfHL+EjnNJ
z>yie<ASYaW8(s_+TZUir9#HU`m*dx`w1kc8upmdyKA0Y~j!v@gI+p=7@4Jgkc&zn`
z+J<VEPe#k{IyNbSy3}TrgtNzK0&ArOkLaTvUep{!UY^!26@<#T`}+*D+LCX(Nc<--
z+wJ?djd?yXOl1MSbIcZyEjuSIkCnukeH=Uzg^CgOy0utNZC_{7t<u3RfcJ(Wyz#b8
ziSVJ}utpWgdONSLjJkLkjv(J9fNNa$SJY5Eg1zoIi7tI(gT{ld?zRBL&G)Jyud8*g
zJ>!#XN5D~1NAqpBbNDs8#ZW9)>tz1Q5yc~5!eZGUJOevD3x*ZyG5n7T`GAp$biGgW
zy7GLKR2Bls%<4x=)x$hZAvBj3=TKyy!B4Idn6)_6XD)5`T&Vmva{LE6x;7qq{8?B(
z9P@ih8%s6Sy;>)7N&B?UEw)5y*(N%#?<%}&Xyn%<279wDSXZ8hkji#4juUGeNLvbm
zUQ3Ueo_R(BFLC$zb)3`cIJKnpGl7QNa}x!sl20c>&2!Tf!dJDWDiMGWs<{V+!2I`$
z5QYcY{N=5ephBn!Es-q^b<DfwS?80yi8A*3+z$Gwrf6JcqI!KVi%;DumU)ZovOjUl
zI2*gnKNrC?hHzIWXD<({(^#)IJr$0nbw4@PTy77q(}t{!enHvdAcw!$Ui+Hy?i4o$
zM}xoqOOX%&R6-1TO=V5-ePRIQVMe5Itz`Jf@=Wn#ZVSr&aNZe7(%7(9>nYGR9IJRZ
zQBa$A(<?7OY`HBunqYTV^y>!1M9MTb#F5@FeF$^10y9c&ggWCH117W>OiBak>~3dE
z`T@v#=95x{;jg`*l%WtWIOo^?I_;+;iXBs+@$7&~ock_9%f2G8tnV(Yyev-?u7Jf6
zDukrtnl>FNE~wm2%+e-b9C}m+boh^jhWBzyBISQ~)Lp&<98U?fJmD?-o8??hf_E8z
z@g#r*6MLfCYtGwh(Y5&p_?QWHzs-w2W34H)3^AkgJq%}P%8Z@Xkqh8ULv)#O&yA?{
z#2DoR3$?bqgHzr%2BrM&O*<DTE)8+$IP4{>x*VO$FDk^ims^C)m!z<yi9cFzBYjJ3
z`qA-z(P~4^9VjnIc^2;9p<va7Sc$mJ$24b9xY}k8o%EaT6>w0;EMoG^@KoxHEwjpf
z5`gIa+;1oM!kJG$pMO<dCj8619*cFq#iOpQh~gHR?3-uvl$z5J2tL41skThQJ=LjW
zoEM6?#7l;>=4GOVLuE0y6IZ$NZ{xYWVR1*~bz)cBm9|1s9a_M-!2X0N6g`V9wc)*y
z$F{&0bLFsZ`!@B>A}r)xe;ucxrL60$W|=D5M&vv0aDPPWUNcn~>OU`q2-zPCP$ec>
zC%VUg6rQiha<#|Jt}C8Xch%|Q*vG?~4E<8kJ6z_m00(Y@uk_%0`dl(!-BM)K-7~l{
zvzCL)a}Mp({kbi02S&5d9iTC$HK2ijxZy8F@E8}lF<QET;hwj{krhp%iupHJCl%P;
zFNH`(#B3UE8;_^YCnJ|BWA-`5NvGWOz|}>|I6EX>!U~c4eB&qi4gvyC4XuS#=y@w!
zQ=2XPmbEdE5&MNRNnhjT*sQnHk8txUi|(PWq02Zd`!G9OkY3oYrX><|J2}tX+fWP_
za`KAf5pV>KCk_iHuD-SAC-iXcsiVX3+KC+Iw1#XGwqp~Mjn_)|%DoigJ^4v;)ilv;
zmH0}1aofEj<dGCZ!n$g)`%g~#kNZ#{*G)VncAerw3pMf{tDov>)UxjsQQTOKqcg3r
zxq@_bxYk>)${2vT$!?pmB4pfkAur8LQ$ycTcfB5p8uNM=Fd0O$W2&hxt)!&0zC62Z
zdRB2H*<d=JxXadi)W&DYtw6(D(N1~_=xBEDrkGZrogIR(GXv~IG5R$L^Ilo@koGt!
z2>w(nU;<?^+7|jW1#P@am)A~8@T}wgqtn^%rm_>lmjetOA;CRh&SOvxpRl{AW8rxe
z$8zk$W2DQ6>eK3@=`Jhti5WD&^2+LGH;0jxkwg0t7upjBhAdsZrmt&+OU+g4RC*N{
z+`gD&weofUtURg>;h*}9KVh8lOPwOV8f|%S=GT>VpM#}DR%ckan9xF6c+ob=iCxm&
zuERLstj0GPk53jP@4U11{*OoRDYf?xNjeD9B6v1$;$r9<TXh!OXDSgoKOvHtCR&~C
z+9$;tRnOtVADTo?IU4X_UxNZQAjRk8W+#14-j>i$7ZZ=vf(Gr|;M)zjk2|+t<~KKa
zPNF07>(o})SecH$>K_SN;+TTz1UI0Lm%eK&njM*0@cIxY36;l<K#I<(lZrs$FdY>2
zo^$#-0EbeT7~b8~J~7Jda!zY1DL|O(w=y{HsV}bbmVH>Fu?D<CH#d?5Mjke2UGQLk
zblBZSfE)8xuluwV%5(z;x}YtHO1oD7Lr?jF-b;0t&kx=4B3fYYa)PIuJSpF9KELN*
zgp8+gl`20$tahL+ORTwH7{4xf&ZF2HjFp3HzJFX??fjswNJ(7G&+&zrihC?gP%fa_
z;GZfxvxBLIZ%r9*$8+StoKeELz1(CJO`Qm}jgp<J*|DsgO4f2Q|I86AiqbBJbKS#F
z+hw8g&-dzVhzMF^d1@<}_uY1z;GUoyP}M~Mp7ky%J%ibgE59uMKI}@F!LcSo5Kd+q
zf$h&)e9xeMJ^p5*&HLLuTm=SaOHl(hH|9+LN^uSCUj`Y#BgXswE{<x;L~|QAGq-5q
z{Im{eZd&Phzhc2Iulk)fa0@Ty5@zrN`^21JR>pqpEE5eaQv##i1o64Zwq!%?ZLO3(
z;@R`4a-F%Up>1h`e<e$$CPaM+Qw<C-cg!BjUugU|C>yc%!`k-@1}9qS6dq}+I|ck5
zHn*l|@KQwUEt{oL#RUY;(voh=dkIiszd?dK3-`hdyHn!a`Ii9W16E1&#Vn38lGgg}
zp*<Wko}fBj(^L~4(!wxXX_pO11SlMFUHn89HsL+Zda@MEo3G^TJvPVnK9C3O5@7j-
z%?D3kpT5j{d>m~o5&{xmFS!~41ioJRkhvR$-91q;yZ~SkC`PDIlV%{e>>&QW0`x4v
z3M%+PEfnG>!r2@QfRu5pf!Rx{p5V`YSK+lthX#0!PyTg%8!gv3IzemZ`+FORAe2Gk
z)4a~$bbH=y`!Z5@Cc`VMW042D(Rin0Ab}&kM8kqmT`$nHD!7Uwbt3^2d?y?jsioh&
zp2_IXx)dyaJF5j3$gs!7d(En_e?4flFQs3}pjZ6$?5q$4drM3EU*^*(5Hj(X`%7b1
znU9Ihi9=WBPe2rM+hvRTXl{7=b{we~#iVKC8+8i$`rXy(dOUl@R57~)r#-bb4Td&Q
z?`yGX6}{BZ*Avv;rn1u;gl2H0AJ~TNH5meM7#mt9j!EQ`oR_v2dJ8Yxu?6PlN)^GR
ze?sgZ*KjYaTY8#D5H%HkB>RZ)J&Aa9Pm+vcS)Bij{3oUMirVvXQ_-0aw}mkSZ->jr
zAcjuUt<FfPPpLDMh7(|bT<L*IBk1mcDFZP3D#t0qY72vjd;aAx^pN(lM#oZefQ%6}
zJxE#Wq%SA5>Pfh+?>Xjqj+(_AD#MAJu$vHWg;HGcS2!MhiDPkvf#AQ_L=Oq9poA|h
z0BJ>{ffd1z970<bMb~kU6xg89&J5Y~e%yjN^?u`S9pk8Lt5tu+nuczzhQ}Dkj?@1-
z%t_+PUQ;h;1)Wl*Q9y<c{&Tf`v7FU)*&`XSBs(Ok%0TPMVPvWlS}&C}{w!Q#Rsdlk
z;m|y=Jt$*VMo<->--}DLXz)dqsuC=VW=<0#9_p`>3Q7G+N~4&`q=^t=^J0FHzziCt
zjfK&5vN1o-2HKcy?Zl>Jz1OSXVz4sP@)6RCOJbfN#j3HESV6#jGr^iH?r;Kw)|*mX
z*ioU4BA=*hofGpjxQEDTEJOS5Z@^`s`?;#wSWQDivjBK@JSD=jxuR_5ZMnU{8f8wG
zDjHf#0&EY%&IUTCvYS5OaivvpRh1pO+<%Q45ZcJ-l^$XyLQoG6<zDi9lZN(XdL0Ix
z28hwpMTcWt#1~=nx07ZtaVH7F@D`Z)A>O~CKohcN6hf)wYsSq;l7%Pbp`TmpAodh8
z9-j=(Wk>MCQ-P=)E*wc0P~|mPomkh@$QslK?-flT1&D5e>Y#xf_(D3szLxBhlZHnZ
z49=|1m|EHq{NLp$;hU}3D~gB+czSxuYwNosS=bBpDaUx&4?oz%M!MJT;pd;z)ml)6
zYzn2nH{7Eip^_h?n3m|iX#fzEyXEkJZ#lVe#de;xv5L5bc(Vl?1nw@<?N9;lxPbdf
zHj4gAaVXRf{u$?)(%6IQEblTe8=Gm~W&gmu=+27+bSiZGOAMSiNIFR2Ci~+*p$|ZU
z@QMDhDM&ps!~I+Po(B$(QaKMUqHhj+qbA}O>?tnzka*+;EXUwR710jUfwaG1E1jnl
z$%s_*xlS?>JO%<u7Wv}=*}H&@fSg^arj!6f1Vuw!EUg_%TeRs(bwF-*rk&6^mOtq1
z^a@HZ5W^JW8bjl}>3PP>p!^n)zVwdt{)&azCj2+ENs=w0dwQB@o~1yQ;}Hus(jUid
zO;;UbVr~WmC?m8gpFd9-J<>NZBR@t6=VXa1wXHnCA;C-v0sZ*q=44#$v%$Gsw)g^J
zuDqq2g>#km+lI&Ed8OQ+nr*9u?og{k3*oz_PDjaO-Mq^Q(iSOx<}1SP!<*dAQ0J;w
z^|wmr%2JjZ(Epr~A4GEIOYxmhPuTY^LY<@2!D5KaCG!Tf-^<(d+JuZnK%qt#MmyF!
z%4o{96ypz>8YGMX?=Cfp<%>TdC*g4_utU7==6j0LO=YrWLrHSmTsGc!YtWtCgb3{S
z+e|j!kJm+0%ZIahI5cJQD<yx%6@yqyffxeq^KLl46zyhRra5hqaP*M<=Z(mubx+ky
z$hhwnvA?o``Kk;lddygOT$teN6%!;8i2W*GVt`#!({mtfP8%MAFbI54`rO(tHtdV!
zIM^f&I0bp?#^jfTJ|&-P65ev3=}3H`PJnGlZFot(#z?-$i}Y3<+ulO)jw2sWyWnYS
z>w4YchWDvH1z}B1llQW1E7X{4aLi58pn6yC<?V0`%J|1a{sB+lzA!cN_>*4QKIkp1
zJ0c5b553ICa5~{(-x7A~y#aQ6a7UURS5k6P?L_-}qR6gr<8tY(%}?*i)c8o7;P;T6
zT<w2K->~WiXilkDv5gWP68$>4I~brl2)f?ezq`x&lz)L1lBP4IaD5!EbK2#Yrc3%w
zqk&w1=r(y5b?%ify;?{@sfrJa*kBUMSIz$S_(AZ68;RcFj96B6w<|xt0@Q!!BxX3j
z&Hv6>cO@3HiP;HwacI*|TiQjs`b~ACCaup#83o2OfbXf5XWtz;UYQIjKQb^nFLm~l
zM3~G==jEN4A*y)P$KW`EH{s*nu4gDxIg&h(yqvlM7#HL>ji+4H@W7@~!Y8FzLdmkZ
z!~G$8Wastf=o1Qb4=}8=^upx-nN@c^q3-4>F<Wk?8J7VO;Dpj%->fy3i4jZ#Vjt+6
z7u?c3s*VsQD^k3>2X_-Y$(qG#^dPrN7x5Urc#jq&B^I(1k_rG&dx`0_I57#|c-VNl
zV?u)+Xx>Eo4)(S|c&P9*K20U((cDn!Cm!MNElm9e{<EM$kS|P|?O)%RHcV1w@X*EB
zK{mPecu){N4jlu`a98F@n9i34FtA9OHn|SldzItHzE`q=u_dc6$Q78$?<JB4l9t13
z)6L-yRkTy+BV(AN3dRT;x(c>UhEt*bt^NiWg{~~{VFiyrGDu(o!#9ErPVgp+WhUGv
z9@Q&}HHo1LV<=P!;eu`R3KB>|2fw=vAEgf?D5TN^KMC9t+MR~gQ3bj{*-biJFY!(I
zbT7yH0UUaTqfqX@*5-SHL~rvjGc!2T*8`G{E%k*Sqoe+HVmL5;EZw6pWpoi%V|fbS
zi)T+Nm;$0oIe5=$Xv6HXv`HDXoK2k<=|kk<5|!@DgWb9Us5^>1kHZH7CWo?*)FL_P
zJ%j9^0QYFP*8iOepuXHG<Le!|Ot#Mn&5PnTpjB^fOm@Eeb>X@GejcmIteHOTU^4Ke
zE38d_v4XZ1&!IJd;~9O4x(gqx7-liGJui{U2c$M6PlrqH%S$f)9PRhl$!puT!(hA2
znHT!){|gFpzP>AEQMxB?mXikJZE=;)&Xff_>NAdRWd7;?+vDp7HBmk6!?1+d-rs)r
z%+DTmQ+LPU2p9?`V>d9ZA~b#h3=_3^SNnamR<xz8G4~v#`}KR_10op&JRzv>x>gJJ
z$7Qth_F=;fI<Dxy+=u}Zy~hKhF8d1GJI$pHxCPvy@*!4S#;odlz<VfSV}s<bD895j
zOzHj;>6S&6&08DT1Xw5{egdc}Zu3jUTa#f>R{G~YU~ar@&g_qTDkV-;kLl;wK0?Jk
zQSHF?$e-qLJ%(-Itr?aX;k*_&i2wQ183-u=$EDko(<kJ-8#Tb|t>ZaxU+rwh+5eW-
zKUx8!`upVf55No+vYAC&Y06b9z?N|42l$s2kQw;=^B-<I%h0+PYbc$N{$@|G4;{|C
zEN*cCuJNPe?dd3ztEdDSBthY)Y{do4d`qEqpH?4XCgOU<Mfm{CeCjv7f87G0lVVQD
zE1qVM_wWPc7Ez}p{abcJ@kCwuSO9Pl;b+E^jg~o91zi7xbCbdzD>1rm$`#{vPKaYv
z`*&Ls#$9PWRzF@@K=a`Im$tpl)Q;##Oh;K&l)MmF5I>}<F!(rcVSm0%FA%R>ZN#@P
zJ7N!O?2o~JIXbcKd<ZG32gKpRacQ~mTz#p<Y7>qVViyF{>DgKPwG*pB6xCN%gnNc%
zQdJLJ`WE39QHn)3w!<A4nJ$GnrMtJCBm7Wz2<r*^v<1%j*YO^cdY6KsHUzhs$=T&a
zQS(B)Iyhzg!0LVXbZF<G|73zfO0hmL%+kybjT}WN*zi%K67LmqeRp)okfU0j&|?jI
z?T=PnLGiZ_*a{EJfHrJarYe2xH9_ZrPMl{|X@}%FtW8}4(>gLedfsV)Z(yBY(z2^s
zem249cV+txwd28c&VA2<c>Zs-a<s1*QnK|av1}*d{#RS6kPpU}uA>hRK%Zsvde8WG
zYg{c#zEDGx)H+%gmW39E^TD7^Xa)EJZ9As@--UP8sAb|Z7YBf7x;>I#?*~14voctR
z{E$y!PURnQ1!(v1-pYD<zdPn_h@$FRTUau(F{m;y;#N*O;;w4X^HkcV<bDkKzxS7(
zA9zmI3v?9W2gB>jc}`Ex(vQwgqp3tR@i4M`aC10c|67#=Jo{q6BxH@)lH2qhj=hC~
zDvmYws^4Z2vefe=jX7gI&?NmVLS1=Wcu&vJKXs-%rpUyRF{m#r&liI#J(wS75JZN<
zhP3Ak*l|2T=SpZ<9yGwqqki=BeCbKPU!(bdkA5CPN@;_k;rF-EKG^Nf=3`-qlk{s1
z;J^nt)Q7S#9sYhC`h<)2dIz5e?*%AiN<U(kmsO?%diE*%%npMi+%_=9Z~S{Y=&5w$
zwkbJJc2`lIpC<u7#@Z$*#@u#}p`WfgKTAM<yvx1`0>)>&DJz*QWN==2nm!5wI#XUa
zo>w(i=&rSaZcunBFhV9Pgc}*<R`R`)>mAF+Xo7JClg$4sH;?vJLr$JN&9l_gwA5m(
znt0poMbD<Dq+Re^1ra@s;=;Q7hO)`~`ccBk)0bI-SGVA{;?dD`+Cl6jYQ1K+)?#dM
z^uROcN*13(wl$8D_0Xd2u-Cq)J?e21@y2>N+3_kc7GxTA%u>#x>p*iw4H*1_3?RO%
zj?0hF&(_;L?#3jwm9eaGEws!eOQ>~-(kDT|GdS!rO2x8==B#F()KFpm_7nhM$24G1
zXAU88OR}%%X|n;#12c;=>#2z`We1<Eoi~V&(e|inl`6z#U597eIE$(>qWy%I=cR#(
z#u0m_Lw<m~Sb^7BxQ5cpv~u-970sEzVh}@by;ABkx7d4alevwS%0^-`O*9QcZJJbj
zR7ivAiJ3TedMYEik^Y2ZrC0k0TMYqp3bc~Ob*bBt#-!wHLA6H3NIW#>suj1Dva+&l
zkNz-l?E5;J_u5PJ!|f*CTkFaM<$tvRXFVEyHbFofrSAs+=JE*qsDyzg+1yq=C?hJv
zoE-~57a1>~DKioWky{I(2?9!^2xx?&ca(0s%>nhnUO)D^C2j*|%9<NgNf7-gA&-vu
z#>6$^zqXPo2Do}h2Nl%j-NHTMrW$*BO+Hk^TN6|iXexTQ@32*cHg%hPNk0L_#{1@$
z9oe?p=Klpg{X4TbOUSJzKdmKIpWJ|WJ&O!-ax)jt<UnGW?KMy@o{=q6IZZjgukaSl
zsA`H|NKr%`8XMGqJTK6KC@z0Qi80jaUf_Q4o=aBNC)XgSvCo2cB9kNs0RXss=<IxE
z>O8;E>@*cSH@v+9H107mU9`pbRB>4z48sbTe}UEyOe8IkY%NyXYp=fnKsVo}800XN
zg60cAxfb~agW~d96~D)DM6=Xr&3*tl9)i1x4WIS*ACZ;Tt7JsHq5mY4_<-j!yB*~L
zOUq%k%^^WKzzS((IOiM>!6u<^b-e-o8C~tyq+KcSJGDz+tBjT-TP1vVfje={+hz@N
zy7GA66wYm(rp>EM1-ehCpn1S?(Scn=-NL`c%zx>b#A1UdI7wn#X7hb1lQbOV9sP8*
z|A=#sUxCn~dC+><ok33{ozRUBx=s<nK>YbnRWtyz*e)RPJ=moe{M-u;WzqGx_bUb`
zz}u;5HSkgXfDFm8$#Q!QsKXCT-gyUkubr5)b7$_h(ty@bJM4Z7eycYyi>4_oZW!Jg
z`E=d50StLv^;jc>4r>eWxXm>DC4sSv1Pso%QfW!bza34w4$O*6*NV<|iNPn;=iIXF
z!(g}qmkln|ER5kya*q3N5bj=(-ah4cEq-bU{uSG$^Ww$ea3hcv^f~&9p6)gOiV$~P
z4XEh37hYaceyIE6g{~-tz^RiAe^PqHbd*$|;H$^jMC1~(g=8z}XAq>3+hl65krGHh
zXnDIDvgC>DMVPQllqEcfgFf_c10D%I?U(g-JoK3b`JRl!$(tR%<TPCodP|(N;rf6K
zmc!DoefiXJ4X%Y4>vr8Bxbb2tsZ=EGeqL6?_YiCA#-*IVZ$*c^!kC(JF3d6Dhy#BW
z>t`kb#9CF<T3rqOrg35eyZg1ZfqeigFyG%6Mc(XoJItW7m3SPi){^c)gJS`8;PF2M
z9PO(NJjZL<ym)f9LsqQ0t=ea^`^xeAG>7vX_+JhEVZ;li0Y=ST4&Uz{cpLy|_;Wvo
zZu{?%0*=kTX2{(W7J%>bH||_+ScPIuxgmaP{e`H1etsy<*E=O8F@olIT~nflc_S5<
z0;Z1tP>lkJZF1Ift$~X3eY(qF|D9AwY?TlY3=51M7~NXh(^2Hqv3}@t#ECr2I4%N{
z{?h>Z_piH+fRT894DZP6rDCSLlJPbTJ5FPm-@OzhX-h~-Dx?>Mg@tkbs5u$O|Nr?_
z0uf){Onf~tZBm=hr?&)Nh?tPUbTFyGv?)>uq#8bK0Z%8JCk1VAVfs?&!EINBG6WKq
z&oss4WWUjOyPNr5R=X5wO)du^^GL=$10Zo&!!3%JmuZpwUy5Bn;#c9(8iH!=5=2y}
zaek)_>WYOLa^#5-<uHiek(8BKt`n`df2Xy<`H$fHqn%cS>;ZdBH?A*=s0Rm72JF;Y
zSeQkTIptA<F?gc?d%Mh}-G7@^2K&B#=cEavS{wQv(vtgPiZtdc@c*<e#uoxa^PMy;
zN#9@9_HGpv9ZdK?<NNchPLQhQZ==2!=Wk2pH~I|N|K1Xz0*wjhJw`Dx*=0;?b~WEg
zYaS<~&15HH5ly*ai1rCMJ`==_A(F@Z;>9gz&Z|0)HabGZ6Brsq_R$oyelyw+CVT^k
zK>yDmr2K%awbObjvtj<v&H!yV*ucd+sT1-!+5h_?7_f?>*WcRJy_WSrN4x}h&sDAH
zQ?3|JuD<7eoG(&8lG6k_d_T#I_ocfipdFCZ)}_pIYbASs9974<-7Xz|{`{4ciGiO5
z6lEokhB~SwAD<!B5qU(GFH>cr8lAy>n{RwulH5Rl7r5j99FLRpn{NLA6QUNWg~BaK
z2ht&qO}vG;DtN2B?@9^t3sYwOZhIj6d_B_co@gPdj;ggM<<z-~^SBh#UP`$toYR2G
zH65LfoG}*_;aZ@)i_~okRz?Xur<rRkjtq4g|DGYAGN-FEaxibANT(|qdoUz3c3pN$
zNuN&E4e?fH-d5GMnI&+owii+N$`F-P#!9R09{K~l`#U-|x;3{$K|lR^^8<b*Zi>oU
zRT|Od#zNg^NV4=+gB%E2XQS;vBUB%(7nu7c#d@RJMei?EeC<r-wMrUnm3|@16(6ub
zhVkfXU#iCQT(s(=ahEk`B^V{oX1j|<0rTn?t+h_JIfG=qDxs>^CO{^|TX9FL6>YQN
zntBMPOLVPs4dbr=gNiVAbEy3JAp{kg*q<nqO~gs7%Yurllb`?T5(}@`<TEko39ia%
zrxEq=O&gb!E?IfJQ0HgS!)k8>{vx~&(#U4*gHAYg%_cu~iC_UL&T4~Ej-{FC&=q^5
z(_Uy0KXw<>Ra5D$lqi{Mv(-gt#We+}4l7k7US8p{^LZHj6THt}@?OFH@rOGdS8IGP
z&|RK#qv&G=$D4K#=V3?wVKa}Q%Jj>(@x8fbO)}GSK=JF~6w@nyCuJ9R@(6qTy=ti{
zQ7nR(#mL0)m_)ukoVbtxWV^DNUMColws|Q51$T&(8<cR$h+54_N%{%%NbWipRzy}R
z1n)T$sV1#t{17rDX^Ob+sQ6CJo<Y%!rW($7L_aSaq)Is65}5C46i5PJ+Ji+`Jn`q{
zMo5X&J&mnj4#*XS_VSMm$z*MQoFeVtq}<?~qNa@@9>8koPTXQ~(qXa2Wl2lRlb|4d
z&7U<Yy0wo}n;$Puz#FNsNJJ);pi=EW3VS}e-VFaCk(8no#6EHoKJQ37x1Oa_kTRMm
z8J!Y4A@d{P2cdVRueg1z-l_Ns^NOHlA{B>oDzKmL$bl>r2yvp|*dFSPYP6rxzMy%k
zCt1o0qZa>z9CIop4o|~&LQ2k@@cPb`+H&5XYSjX}ovR&SerJ0|UIv;FcfiID$>@aG
z9tp{)E-qqmZF5Rq2I3T}%d9zrBvBHSYq0V8^YnBWXJvWwOfi!EyPyzE_Ldd#-1&U!
zoqJVjb2Mf$jrhp}TXu1`^>|lV*5n!(ZANs7S#b$Q6Ar0(G;I8-&T<-gb$x+ye{F_0
zkRQ%2&wC@53!(*Xa#Z9y1w#&T_G*xQr#IIoNaxOEMs;(sE5_}$5=;d($O?0H_Vx~S
zyT3<9R(f;8_bsgl>!10{oTZr<%|`m_;vj-_BX$ap_7C%C8IejREbrO}W#R^mAsrmR
z=u!EtX|rmF=~c^f6@UHe6GtQ|OqTzScGu5sS5lSzGVCk0O^iQ!{dsM^JDbK2%`ao`
zDIsSMC@qz0Ksqq$CpWBNawr@aS{?>QIWWBW8Kp>SxPQYGEARcuPNmX4<?E0|(IU)F
zVLLnuqbvcANa*HeDZmBYX}5B7Q%}?x|CQJGXIB7bb_m4n_@jfm(Iuc4ACJ7~Th!2$
zBywT?K9qy}oR~UZR+375--HC?mJLZliaBLAgBeoDIr+u)(46B7q0nC8;qC8~@cVtK
zQ}n3(Z&VCC8Tki>@?FS<69W=r;#iAODL0VUhi{}*8U(}sARQws!tM2e_w8m(a^^%x
z+RC(?X8V%C{06p2#Sz?Co*W_1^Mn^>wllj-jLj`0H;^GGM9S0w1KR}!D5N*fT-u0q
zA>dtyMLjqZmQX6PUWWGir0qL27KBmcA-IhaKk6itGm@iwMu#UP3}L6FfI9^!-tuv#
zaf{}TnDvT$Q+ooa$b*p6ViRh1CZ#&d35YW@qGK{}IJwahCn8Yy;o7)Do;`abBercZ
z-}DOPdZ&<OUpwXwfCI22*vlCilBLW8Aec>7=(w=r#E~Hl-4`5FDq7vD*=qz=)S&^5
zte1}L$prpcbti7|Ja)gYrR0nSh~7wsC!()#i~IDvjdIUHo#km+6cDq>bsa9nrsBNF
zjL2$R%&rkFy%lDO7<iMWWH1=IXY^ZYgxhz7(XPXiYvzpLi+>H76dWBM8Wz<Ek!1G8
zdXYtVipdpO)?fE%1QXbjI%p3S76K|h_v%cM*ie#Lq?us`gm()H4k66^<DYR#<Ksj!
zq;{ZJbQ8vWm&)_YE$cR^82j%`SG^dK7-!wYA=jm_a_r{n*lZ!lnJ6gS*iyKe*onPf
z-hlOs3`s~x?e;Kr{SA7zq`s3!fb$$;!VnQ&TW4r*b)05$V4-hz*p{*ckm->a8`jJ(
zF!V8sc-rgBP%cjzL8l3gi39ss6n;xd1FQ1`=W=O1RtgWiSFBbVFzJ<AF{@=dGH%vw
z4WXu_Bs$L@`wnR=FJcZQl_5e>ovSu?ph6QSJR8fe;~TepgPBqjn3&k(_hXOxR4_Cn
zQ``$`&qxtC*=ex2*^Zv%zvKv@W}DNGs@cuz@>tgUe?HZW0G0J>$5O#P`nLw_PmuQ$
zM+Tt%+2iVeYheDtD3u}r9H1Rh7U}SBL+YP@>CT7Deql3nT>f`R#6NHX(3h!*Up1&S
z#7_KgVFCbR=*|KJr89F_{?i=sFZ?Zw0V<7B*(=>k|J$VVPrj1g3?N7b3-|lKEZ;wJ
z7Q}!ShRW;~>?Z#Qg(Ncr1d&I;VH5p7IoswWzNdQ&&8KYt);9hlN-7l~C{E-j?ElEI
zWdWRNZp<~E(wY7T1CaqB^N@I6-@l2<fArUk1-MIBmP(Gve@kTkxy?8-fS~*tJ;DD!
z>cmLlbGh=35oKPtT?{_M^7tS1`9dNs+nEiiFREipVhau{ugrSYbP6uhnw?!X)YhXg
zyJ!_#R9Rnq44wY^nfMb!Uuf^ijDeZi@J-H6=zpam0(($Cd8%KV#e2*<Kxd^D1Ht0P
zBBNFuNblYj(*A#py=7RNYqK_7thg156u08iQZ%@`OL2FHqQTv@xKp&n-6`&F0fM^*
zclolOXRp2ApWojc$#GwqYwpo=&I#=phfa3*|A7Jo*8A56QjPoy%a<lb3d*IuS8d6x
z-uxL?7Y6&+M$`9>jqx}33>{23rZ?pp1+>ZJ-K>=UPltrw;N3J%+Bwgc4P^a3-bEa8
zUPI{_bZmC(ybI1=1z8u!=&X(sZgVTF;!qtCP>totx8=-M0u7;s^EHZ`AMTRNQ&{y4
z9~$rK@2<6UzVnTt$6UL*xm2sO{;=TB{}LY4@z`dr4xR1PV#Y^_g8w%(l}qt2ZL8`i
zX=<<F9eCjV1GE0~bg5_;>Q`?c0y&}RSsEi!ga#5}(rcd(^r#*_zQaaCF<s(40d6V7
zUyXi-SS*5?h^|3xzZr>j+zyy2Pbn8WxqanbN&}95@sxiU`9FoUq#-{fVk(E=-dsl(
zy#$H&MVO!V=J70R16Ec}Sgk>NQ&S9QjO7lwIJ|1}i;muTVfLF`VvJC5W1k@=Z@uMN
zA_{=|n+>BR;5b^o#*N`tsv;=Pz3%K=8MoBggI&9qxeM96WStfRF7E#(Y1~d_yFykj
zqG9z#vRj*$_!B<_Xe=8VYwwSZd(mq;$Fo&hvpGhy^_3RS<m!0Tm++2uca97@1V$`;
z>M^HQm_c?rpB_M2)m~rtw|~4ph*d%wDIn`5eI-PJ>|anfj3{LrMEENEw11d$lKWqk
z5h4jtkbZ+$98WCP2MNl+&m!9f<mbdP>&;{|<}{vlI$Xj-r+^XpA!p*k6?!wpELQFi
zLy@d?G>4KqNZPMT!QEP3r&T#b2j@K-GD%iMn=lQlKbhH?&P7fJxzEwxK4PQF&a8-o
zMWy?^wg|s0m&ywrO(s;^gjZ0hH@_e|FS7P!aN1XytHJPBja$|zsr`GhJ;}V%d>qb%
z7rkUVAoJE5L_saP%8u3HB=Xk13`ZK2bDewdN@VPVv-R9fCJqLCS6j!RFkS{e!s(ux
z!0D!j0Og!?cpmm|WS`z|!)knR+FtDGJ$|mzBE2KG>Ja<Hk~qN&yJ^X{L`l>(wxC*;
z6D2K4LP$&=K>VTF@#m9!Q_|m%pNP>mY*5%t(8~_UkN(tgLa)$UFu)}dJ;ZcDf|6SX
zwCJPP^p^6m3dC+Q?yyuZ6MghcgU<u!f3GwR7p5BjjM?^?R}t%Q8!dCFp&+LzD7N8x
zDLhC6-+lQ@wpCYlMVpWC1_>(c-#|JNg80UTaqjEVD|rYuM5`48q$^^rkO%^BEL08C
z_5<ksEbiE3Rsa{%4J3oP1_@_Y5M=qr)E9HA_VdEI3@Jc_CN;9Yj5cGR4+$oUFLI4r
z_@@E~=Qs*`iozU4btGM}Ebrk6xmw@Mf(oZp!$l65MGyxbcDL{rp3L*!2T6x?KH#nG
z>XJ)Y(%$9aks^nTWe%5;&$mfe(XxN-r7i_*b%ZX}QDal1bHUQzd{H9C(b;6X-EqAq
zR3c`k@I0*((PN)T@0KQZkV$_K*m06@tryq~d|REFWj(8NwB()L#MQv}e48Oa;Hc>c
zefD5oYESiU4DIVuc;1ruJowhwVp<X=N!_yNxqGO8ACmQwa?eW6Y_?OZ_!E+H@_FB{
za4a&4jHbc+cWkz(;t9zxjR8OV8==;2Vq;<wI<0};<!|E^&DgFj;k)yOJANhL#`vy7
zEI4p)$|yCS$vc=^)q>7!5whE5fp1}e@4dRp4$;8#kGH_&C))V{-#wfO%xIce|E=Ba
zEqi|Q>!FfFCiZ_!kWel>6yn3;#xg~<BhRC?w9I_k)L3siWoM()_JlaxlEDeo`NNM-
z#Kg?3A<j0rP>;|8$ST7VfB0fJ=w=qYnDQ~-NjUm8<?|gGry7gApN4m2eTO`FEl&*m
z^ug)Y)!-)Ysk)sMfrn!MRovW{)0r15a>IY4_)YoFew;FWbclVoupx`s>xpjsQT<QK
zB*|*KKu5}0d%$%h@@mjrPQe5TZ*u~`uQ&q=SA#&PGdoGnGL;y%yWg;!*gw(?0y)*c
z9-A*^&+NJAIci^Aemwj1+LM5fJ1@yis0G%@!b*9kF2MaH3^A;#*mcB`WYcFsN#gqX
zR&zBoVL5s5>qc_^ciaQ|gH6z4V9`}_yJqBaB&gDMo^-4peqNGYj~kch->TdWvJjJK
zxa#Z-v)xg=+Z{ScTgwgoo1fO}WpqCCmi>O<@@?k+eN%Xm51c60y}6?!65mgU@C)=G
z?ow?=ct0%(K^IRArlSE!K%|i7NHr3wb(#;f4j+6;(p|SL|7an@zxq_igP#Bm^H1uq
z=`LLA=qwfnmqT9?eE>DJpQA|M>d9S-Ev~D+{b9b_iLYBK><UAZl157P2*p`%p(6SE
zOX}HX=a>k2UwKAFWaKU(c`|lQJ-$I#?F_P0MfcnwyqIIhOSDZ$#W=<?q7P-Ey5%bM
zDG4sYdtgy1uex@xEA&LQ8C}19k#A7YaUeR_s@1zBvH;%?KPawgzouAourmwaOv~Y8
z^#AMS*#rW9FP!m*$j7^-o7>P>9$mT>XXDm$y2((bO(->U_a!ZP1)t76ZEqyb<WCZx
zfLXQtnwWiw(moDCw96z^zcjPs+#4vp+Ll>z&mrt1P=Jj3VLr5r$cM#|(i+$ipGQ_}
zU!Wmt^J#ZaN0kFnuZK{X@gGAU_q--U1nB^~&*Rp-2_IYFRm?|USaB@;oc9$UVjiyM
z)a|gi;M?FC*PMQ}?9T5F&q?Ng{|?el`${XW0W%f}<#E=Y8w^Yd?9LYk$O2kFcGUj*
zI>aMYv%i&M46p%cNf!clpsR$TtV!kj-!Af@N$cuWh}plv*;v5e>C1)eeQToSqOj3S
zCT6CTV6R45ul*XZ*AwDiV&a?rCDzR~4YN^**G#v%nLT-Lf;n)ohn&>{;Qn}?W9vfC
zqakI<!Zmd}`G@ymfrHGBznSB&o@h9s68a`mUKE*At}uQ0hCJ-k=*KW5VjcLljcD~g
z+7u>v!g-S2cdW;;i7?3H)^huB5$m7rx=!J8_p_5NgU}P>p}gu;x>Vi=0=ZeLJj<o6
zyx;JhSN-yui~sDc6gcS6RM@~S(#;31?%2RqDKqK*)jeMdnQZK{4d}VXIgB4u^2HzE
z*9G2Jrn{G6&@j?lF{h6fGUY)>ouJDUWsXeDEX?Sr)YkCcd+lEX49Z_bR2}xzV~>7$
zR^ZG0PfVjp5)hekb~!LnT)C!VR1y+`a6%6U>)yJmImlq2Nya@p$Ya(J_bdBjq4wdy
z*&o=!l*nJ@TX%(xl`AQH1e_N0bXB+7&O7Q`B+}kzB`B!A6weN3gNU;080k1Ius;AP
zsK<sk!uIdn>p!Z&(SDQ$9O5RPbNuW?lCe0DaXx$((H;P5OR826&Xg%#J#wn72y0@w
z(ekpnuekJzBlfs_4N+Xr1VY{xUMAPHd1^7MLQ{tnuB-13j*gEGA<7kbyrfwg?XQc+
zU3zz?ewS}lhe@_bt9jo`$}5<xG!l)GPs}DSa;|m)Xn|o~-Eg0WtmXodWHu`!^?r*E
z$;P6_jjBmN=`yNy8Rq%cf8<D8jG#g(xHRso2q;DI`b1=N_ncAZC0(@WfBuh8E&dn~
ziV?xMiibhcQ{D<hWLa=M|06CR?^NZ7HR3~f`^=yW`TH00%zG}AaE&>J{3GaOOxXTf
z!HVpgQ*GUnmtJr=PZ1LDSBgHg?Cxv&=bt}5CNT@*03<JOB(^q%PRV;|xaBSd@^1b9
zC?=96h1+0#43*gP5|J{xmTxSGQevE8zEoPx$G)kt#aw#v3+{*&TbH0*z?MbG&i|Fd
z{UmS6-)OLWX1%XHr7a{gT)|F<p13}QNR*%2u{balGfC{d*I~1!dV+N=&bBdbIOZ=h
z)eFT17@~TBvAbAbU(cjQkRJ~{6}xiYctM{?>_T5Ukq-}!&t0!wi|GHfa0P?kX-I?m
zHRee2JCkBm*A2;r^3+&TVK}SI^4bsd8}u4vYP$#SM94xc6N10%Xe(l_Vw*$b=o#m2
z*?%LeM+zlsC@~sdDyI--vhdWEH{rLG%-%8vWfx7zgo|t4vmOQ>*Ye7DX3V?b`*5zl
z2c9MEadhV@99KjQQ^~B|@sL%}#_pT<o_#CTE_%hQH9aj*pPJOD@X};tn}G7OL^%x+
z%dt_DueiW>AyPrn2|q;yR=MZcBkCU?cU#b;RdIW0P{7`zsPwGnY>C1Qf>1V#9X}Xh
ztYJF?VFEi2QAH*9VA9w*(g?>We}1qZ>B~N}udk#1{j3t)!4Up6cEVy}UA199_#7Ub
z9EZTL$_GiniLQ>*o!2Ugdc1#ln0#y4F{o8&_65HIp8X(|-kP_U##iE~vNtJ$4?;5R
zP~Js8eRn=O<FR}Rt!yY}3fe<`SMtsS`F5H=8TfM<uMt<orKrd1>cjk?Rozc+?EgxR
z4GD&gYTk8Wn8aCqkXvn;>FA7v5^tjnjg(=KwLxg2gFv>s?3LWUIklpJCi4)jZIYHz
zn_?^mb=+Jz{AtguTc9wadF=S$Fi7w43-VoTr>u~k^mDp@NR=i2uyg^X=4=NY73I#L
z>fDNg8I+V{D8o+`rp63ktSEtIgsQNnZF;mjiZ4;pw>hXfCFyvbFqv3Hr&loKSZ#ah
z#>i%9+TAB8CGl*pShNZ_v0A>iF7Eo;nw(WG*uvsLG8b|cLUu0b!FP7yz~Y<a<I`Bq
zVXM;RdwG-VK!DN5?oCn#pj4gMZ~&<|B-9T%SUplz2{`0T@>NrfF!*0UyssX7)SpcE
zN$7nhtc)LtJx=|*M(wbh%3`6zEFikn<la4w?|2`Nb&s0ipOzQ!W;I4L8gW$htp3aQ
zNe<<RP=<BvJgvLRRd-eMvCJg3-50RjH<xZZ(U2QrEwN$8{OKYB*6#0sjuLYxAM9F^
zZBKfDN|;LbSSb%FvEvey(yIn6%dMOrZE8&bHH|PGQ=T_nlG__Yw!ZdEwOd*;3UNra
zxUWS@36>0T%78hnmQT)%?i?ylLs;#+hA=Qs1zWCK?znEKP*M&kD2<<3x}DJitOaG$
zuiX=r*#)=N)Bc@i6?}|6o_myuTS*8-sg^$7+}OlGq(Kg#^4CJpDw9-!r2E9RdsVxz
z(M1WSzItCpXPSlNHPMKF9)1l)KPX6{ME8lq3kn-Q%A1dz=ajVbWq%vDtg^(+K=e?b
zyqx379d~4%CY?6?_ELG@7CZ%Cl$4q$Fp}L&;qK-tc@y3#0LH=)P*77>$6dL8gZ9g=
z7m;ZGg*ajP&v{CYW*QDIO;3F4ArAEf2;cX*qr4J<kH|Rt*009pk5{DYe5P!N4@JTd
zu5&W1y|PMTRu^lCA>j{w?h~cq`?b=iRx&lm_B8F9Qm&-wP3b5u%)blWVxnB6vBId^
z6$x=!pT&INGRdvsWM||(3#3-0!K>D`zIpz!2YJDfnX4MYyIO9S3ABT%MOS~%5TSUc
zPL}7#{L&cK$ijcI0AXqnhGZzI`VSevA?J^tPSV%@1yTm=WL?QhRXZdnYr|Qs25<0(
zA&4=$6EUj+6pn^dz|3GrC`fQaelO6=BUQb_A)WCa@%`?CB-^hX+?+W9lzU;>_cY2m
z$};n3Ij(muVL4BhH<CA7kMbw-%K+t$C&#hgHiWjOlO<yZv$yYHlj;`MKdPS@lCnpC
zc4Qs1<10w3?5%C0`Z|b-6lWa*^B=i=7V#|p2gt+#&->7oU9Y7?k(kvyhq9z<2mvjI
z;7$B^-PR?^HaD@bbFwQ`RQ7r5?2!P4+7t7AFrD^Tj3@U~Fb-(mgE?F2#DjM$<zLpR
zXe0VnXosduVMTYq?=;d_mwq4gfl$6RBmA6L>y4A+YoU0+eaL<%?Fv~5v4*ae>21u;
zo?<whS@5sQrKYSSJKy6W7t9thCyPI-#OgaGJzE}=ct22xnV2QiJ|^?FTazAqslM<k
zzK9-?=Rh!&Qk`?{Auw{-?Ya>I<H)GHYn}?Ixg0=&rhg8N+bNNvkZ#32nTNJZZks9Y
z1-jg}8vLAJTe__M{MlX0mE69Nq`7HOeZl2GN02r)&D@sx*nZmKV6vXl%zlv)uJg0!
zz15LSnK!cy^RrD-V-blw+>&ueDJX{aNb%WcZGD#`yF8nG;NBbZQ2T*RTafsvI`$>g
zghP#N+-Y9P{J%DHO6pyE8pWHllaIxp?<m7}ywopfKqBrR-0Pk8>hpkA`xF11rY8&j
z6Rt2o(xL9^f*$RWlP&knCCUPm8+8q7Y^YQyPSRYiPWID<U49eycq>PFo!4Yb(AO^1
z)@<}u)XQIJB~lcl!ZtASTcaGgi8Hz?p26S0#~j4=HkC1Y;oK<p-eeZqueJ}aGe4No
z+eE|~3~yjOpLAOw4sZBrzAR?DY2m0Jka;UX5tZle*M3(!&<&Qrq5>79Dmuu!XEpD`
z&Pbga$K$9#*1va43nRBtQ3haq&)m=@OD5<9cBHcHxA~y+RV<3<xJW^A<5=S{641^2
z<84b6;pE#XKj_Xz(di@kiaeh+_G40cj5&tvGS+uZ7Xb6M3xT&6#YJz1GyEV#E~P|G
z1RcoQ?~dA3NG_19-J75h?S@1?N`)kM-s+2H7W{t;lWBr5;qF)a{h`0i#$VtHTjhy~
zHW<!M;|zI)H;C@`l19TW-J85t#=uiHX81%0uwSq@P4nBR?CKW9AxC<glP_jwlDn$u
zEmve>U<Gg!{7p*yc9qKE<t-_t1+26zLVC@6w8+9155;q0g`rQAT592hNcWUj$ZDJ2
zXVuM`?L!vIpWasM`>lR|9AF;)xD%KIpO5IC0s#{=D(m}M^iPoN0@4)5xZ|1~n;g0I
z5Pdu2lUmi6Gkp18Ld04B?j8Q2)mP6%g!qNLJz1}6{(~MkQ$}A!U&Ht+pL+C^gqo{s
z+tO^;&nez5v?>=Xt-<6}>%yxBl=4OUqL}6mg~n%5m7o2dpdU^Tenk4j(5x9J3&#K+
z?sNe@xSbD$2G2#_bPJ#WdJGBGSB9aG^bQ{HovFfQ6xBHrGoM;$;6j0c#US=krt@o_
zpQy%hG&^C6XbK!2Ts1<KMzob`_|f)Tll$j)cB%VhZ@W8^B2vw&ma<+Ysq!{)-vLCB
z(#)7ydGMUM0&^z53#sjFL}upm{&X{s`0D2g)NF;~*#QN+^5p?i&JJ6zL{gz<xE+DF
zb!ui6DQvp>V1ce0nYH$=yZd&Ehds~reNTyTl^L<{ci7~x(sJ+U(^@(v=O`HDJBrH6
zd=<maeb)H|B`v|qU6v{fX+Q8DQuk%`vh4#_j_n+5E`fIj>!$wS(3rkE)ttf&JV}OT
z4wxSPigBC!f_VGYF|mU)<9vJYk8d>d1euA8=j!a3Y`9nUu&xHFt6i)CcMizh#9R*@
zGg(uxZQo*~z~f6HsNh-Lb%OU`h-Cz6^>U)q-M8!hj;_W1wBsy~h|)pGrn<^!9Sm+c
zANG$L7&Yhu(~zyN-QBjfx-QaJiN@(<HdPALe<D2J7>*K+RIM%M%kzELAtcq&uwQZG
zdJHt3?~O@SXvT$;AW<9&Vw%*uyjTam#e|J8?M@0jE(PuLA$1v2Up$Sbba^|>+!gk0
z8^e`d%_A+5q{dZ0@Yn68KjE*%y&o)!L6r!U&nfm`*nd^TL&QNXLhf0owuRX~)QiPF
zvtNaNJzok5<BNN)4;N>*G7g1rHD1YQGIHYx6B80SL;I62LuNRs-S!OFGM%c9-)!6<
zTG1IF661cE(_2xB`6FQkm{R)Jk7i;v+6jP;u0t2j?3l)6Ru(}KYNPvdBi8PSE_!}%
z;F~XDsh6|8OZJ`h<bsfd-Y)xW-xsRXKcMuhf|WiUEIgg8m2-|H!7wLcqk*jc=Z!Mx
zhR(gga;3I4vs<5`Z>Qo%JC>C{33-36K6dGBBE!JVK*qYBPPf*-9&K%l82t(@XM5~r
z^8?>Yj(fzUy~QLo05%3-I?pb#iw9j$jImn?-*^4(-ugYP*-v(RwN4@r*phsVU&YXw
zX~f8+)C&NvuU*|vstXQ6$Hnb{Pk}!;eEa(tbJkzIF&~<j8(T-W;VZiduC|_k9To}-
zJYR6ARtcw_X{FcsHody%JJQLgHQJ8UdX%gVqSfsVA__|6L4@eeCtlspv>X)|>xQ<Y
z@41m0RwsE1J{5q-TcZ4NprfpvSZ#>30^3cj1`Xba_eEUgt@%fhmf&2;F7NA<I(xz&
zqj0Ut>>5E-FrD!bd<#F9^O`>)!C|b3bH24|pC8hg7{#}#VWGD`%@{ysM=CaH@AMd$
zFqEKyIdj%wQD%4dzEjZMR-YF*JQfXL$<ig44awsJPcbd)UYM;$?PY20$F$tU)76^G
z9Fuikt$@bRmw=k=23@1X_x{+sZ2M~uG3?GFY_nCS=d-6?hsUX3RoL@}<1gW99}G{U
zN@xSPz9>}4|CG<7=Fi9{(BSo(5){0%96RzL;wc~&s}u<1`HioPgqb`OQgmUVzVzGg
z?0H{kmx?%x){OY{$8a+Z+dYNa_&{jPA^jnNMURy$YcM`rsI^^#v1-E~;;s9dqUpKG
z=C7*k?oFq$Z=D@xfLK`(z*at!cxZLhYUa@kaJW4Yzp=|7$M<f$tE#PuL*d5G9t74C
zpnP;M>B3RDIRS4QOS?5h&~bKrw1$vAu?TmC=lxj8TCsmQSjZiFMUG{s#KY{w6f$BZ
zsLPZ6a<@7(EQq?F3D{qaApfa1O{3U3ZfYB_WAe4~Q%pJ%77V51GE#e6!`e1xG0KCM
zarZsbbaS-HN<(FFBQs=Sr5r=iQP0%P<~Gnre&Qm13$t<2EIinyxE=*IiPk)dDgkNc
zLLKaQy954Jw$o~!*tYowLZ1{6IEGE%I|B?Dd}1Bsv`N~xkZh81V!##*>ez_EUJnq5
z6HI?dT6F8W6@S66rl<VAaXohU#&b?~5<D@MvlP2Cfi}y}>e4RiQD8XR_~%NqqH0UZ
zc(FHPCSr9wjIPRkF%H``>L$$+1juxE%VrJM_Az~-ijQM^8Xr&3EcFVkMIAIiQl2lu
zZ{lze1>0sNo_xB(ds5y%ej~ky^j(_(uaUK7l!B#rJ7h~N<RPSi*w!~!%aatp7Z$7h
zyhdlMlXC!jVAA17Lv;kunml?beC!J9AXEFpuS}=Q5106NTaMJ_C9ajNq#}Mo;}`{B
z&)u*|`%Zst<3TNn35gXRREM*K;k~#;(T2woBv=VB!YJiZq=ryp1%oYpa?2iYH{W-u
zt-Wm^Hbz<OaJWC#($MQ*G!&JfES@@qVtKP&wijG%H)v_rd3Uxu4k%)_k8o?W3}np<
zY=^Gr_O`b?^~Y2I_V4+)8gRdcb(!v1eCfPOJ6XwHdu8f@nUE&BBYAqg7?;!^lJb+!
zzu&F@L$1{NIiy0}7R5mD<*~B_8IC}q>!#+{<58uilbg~|3_wGY=xMs1th+VTC9F&1
zjkmMPtIreOo`xISLwQrYqb;ZbS)-}anYv8$yytAbll>K76I&k9mi!HvIl7Qav9}e0
z=6pfaWqp6tiE!8N7bZ6F%B1a&@wA;NcuOkOlSqRg%elnG0nA9?!u%L-R0K5df|Z7e
z6(l_!eVy040uF3RgUr`M!h_)YHI9EXW8weJYg_AljpdO<Pv?G#u#8o-^ZY%a<}&`%
zhMe*XU;XVv!lYjV9UjulP9-C)e#H5<(<y-ia_8jgYOZ3XXDjd>yhMt(_eK<=Bc+F<
zQTlr}tKTy4P;V=x4GVG&QS;gXOMaHcY0EP<R|*7u8t8)l9bQ_9iUHfn_4`1h&_OeD
zba9bx+(GSPUx4%Mm-6z>leV|#3w>aigQVS4mQ$^wI~Pk<yp@fcS){7H`J$Y)-tl-=
zdxQ&~A~oQx;?JwsOPeYRYHP2}9sD!ob*t{%ZbnGd{j*Rg8<n7BveN^z+|y>XJZtQ#
zV^ztoTh_9P)Sq1Z>+Ki2&mmm87x2A`MhgNLVHTfd2Uwch?e|%<)^b(F6g)YzKD=^1
zaPe!HE8J$$atcg0d3&25v$rj~A@m9#OyC#^XW)1+-dVPXxuV~SKW7~jCD2Y>g|=T<
zY)Xz@YQZ-j>U#=L1coj0qdkOMr?$HekEV8@xf^;YXIY+yH+rmru3DQMSHLS+H|IJ{
z$-rnVi*FEJmwlVmuF!IZ^SIaDX9G8NDg44-BowEJ@CA_f;8B*P8yth=8Qy`~2|iV&
zX{wtT>N#;B4zI5+%wp^w$y>z(WZmr<U4bt=?!H2q=!wUl-Fvh-_osqzLjnyM4_b+r
z1IGu0oWOKd{Qvmi1krc&+rCfz4!PeS#n$Qhf8VM10ynzFF<<wt)}OVmERZtJRR2Eo
zpf@(UGdG~&EZISJB5YLZIA8i(9<s*B=6Ly3cN06%v)6<U76ZD{bVS>oELPyQ_ri5Z
za@V<Ewgz7v=ehK26q*3_dx|sip`YeE+9thb#wUk1*;En?ji)iwEhfz>fv1^hqi9qw
z*WC-!b%O2?jdSt)OU%&&LATfK0Ya{x{InJ}hQ+m&tG)Ttr}PN?b>4@qK`uex^h?R5
z^09)}<*mAuT^4h-rdZM2pEw5bmhs705+DPG@5jdrRd4R+ep9m^w>2?w{M82mYl9oF
zb#H>)Bp!()a8!p-#`gHF`Y{JsngPU6QIGASZEVQ<rCVR#rxLn$m$U3iJ3oxd<_k6!
zN0-nml-i^IyRF6f$|%~4;|%=hUtL<)nE{_=LI=OtmU30Le(4MLZKy6uz=Woot8XVy
zikhuIg9#ATJWhYwkZ}s4Ia~y{tyrQU+ZuZHVd~0Nm&E-Hg0s$Li1UUeCKe^+`PD~e
z+P@IlOf^45_%SB<AHTr{hPQv)xn)w;VBq>#N#Mxf)cj29hm?(i0A{LD%E>|Lr4R^Z
zeOUbHqH<<wdACHI**DV>IRKHLlwCQJvq3G18OJ^SZS(fp?sSD>CX0Jm!Q57Lf3VkO
zh2}u4P4iKzt48YOSb{q{cyMgp1XAzrfvxf+GmT+B$~cB;5IHe@%TsqJI?$O+=*Qx+
z)-lZo<gbi}qR&gBh5Aetj6vUuo!qYqi!|+l&mmC;2m07GhFC?rny3XTdj)=a*ME61
z`w(zW>>SQKuPPv&H$iuPjvbeeIQ{3x)BE_Y2IMEB+p=U3o;1+5j>dXHcB#5K@eN79
z%~@{hOVTJY*JWlj*P+ERjs!>nv=@N_x!;M^Wbs32E6U*d7RU6D;pDeAwz*lVm=qCP
z%WKa+E^E5&46y`}bjHSE_<duv!7G>~KV&yPem?ON6g5uFUk3KO@#soJ>+boc-(jAH
zyA`)k+T0Nsc+BAAQ7rRd;uMxZ@0X{C663g&y(gD}Sd?9N?t=je(MLEiltS<lurL-=
z5o#?L<lY2|JqMx3M~fYoH;i0+%$+u)BL>_oHv1OOgI$id-w9!t=xS#cpkCIr;mlru
zn*!KS$?ok|DjuIO>9E@L)qRYE<<0oe_M)4kB5SG_0VR9Iv*yS9x;Gs2=N^<rU$RtS
zf(Ks`&n|t=RdJoY#^`+XgVpCAqluOZ+Vqr;F~mEze5@q?i1Z&um|?p)$rsHNJ>Sku
zH;gHXnzqjU8gT0QDfxb(JbUvCV}eZtD_-i`QUb3omzNuql;I+0!pQx&F+e7D1RdVa
z_c*~rXQUzsl)&BMWL7;q)!rJ^TfvjHqtD`ScUe0(&yD+4*>22VtDdip{VKU_#CsN&
z+wD5cc8P#%e@F8eV<Io|u)~<T1b3fMA}EchM86|ZO_OoHZth#WJX6P1iE<aRH_|7@
z^)_6`$4%P#urSk9nA#6J-Uy$5_(!M;A7Rq6*VZ4LZP?-`TG+syL+w>2o5AJi9L!O)
zo9hcw)ZR-VA>Y~Ms>|_Iw}s*hi>`zjN1dZ<)Jxj0t~%-BjA>=digDhNy|IKax%D?2
ziCGUOm*(Pdx+S}m?E0Bqr&R_oH#dy>VdQO3No~FYPD>ek%-b^q>ydJ8u+t~0=9)fS
zpFYI#oJR~kt|j&SwMnuU`}|QtC&%MfUvjQ^=xLyiS|9zx)S<g`0&&JBxdwYZlCSVH
ze%c*5R*hNkHFYY!eopncJ7)XJj{W0d`2nZ>osH~l$}13W&>=W%eBU+qZlb!j+-vFd
z${u3$w^#aIa7!rqhdat6im@cdNkNujnV#v}5-d~{)ZE^Ee_0t#x9wN3hu1M0WzNGP
z$+_ie_Rh9r1=9}I8WmR5pRIz(&7+`Yntx(=G!mHaI9<yvb#`x>B#rX6Lv=->?O^~d
zUZ?Kn(D~ED;~8(j;Kf|}gq}gfin~Sh>JcPu(EQ%}jEF|H9?{)-8B?Ozz)DfNi}Cx_
z>{6Gx$4O|4Iw&|MH2R|cs|^`&sm2G8TgS&_EVeYZBxnR*#{XV_RynA{kU|n<<{QL;
zk?zM2Gg+RcC42cxq3XisPIIzrvG%5)QuPk+#J-JvgD;^9*;>O_?;+uoDnSWXJ%$+L
zL#_}&VLYR?$(412ssPJv5Qjwi<jY)=-+k+iYp{8{)t<bbe22LYdI?(U<oC5Ur?F!)
z!0W}!@Oqd7hqrIo2o;WP!X?2W1Z%d<y+lm}{_(TWWsQSCkRRS9HqUY{_?fzZLYM6<
zu$xQF{XZ)9UOvwWf1CSYOQw+*6o={ZsUs`2xOn(^;9+J$K{ARv*|nZCN}RIMQwN&J
zz7FBMv^C+gRD2B0=2?n#dqR+Jpvb>h^j7@NX}8z6D<WW_qVOq5bq{@o3Z@A&U7Sz;
z7;q;)ne#IHFfB(~_1!?YJq;oszIP5%>e~D>Q1$BEZEy~(sJqq71-tq!P=0p!oaEGF
z>u^))C1x8@Vjne0pmhkFAhf9Umjx}n{t^^Am{)Xu7U>wohQ3h0i1g62{@afTlV%l;
z;Ih^<sx}pmiR9Z5Mb|}HRZgpWcZaH}>eN?wN{r|=z}p(mO`yDKVbX&9LU+ZEnCp9G
z=q66?A?|RZLp(~MOQ&XiIH!m3d(=NR8iIZoSH1J-4YKgzf77MO4+LijA0dsc{OH+^
zasrwQ3|4Er{0&q^CW5T$pAW&u@+9nCCO6n?_n($Tq^}z1$^+?RT{~-{mLAb_>U-2%
zlIHb&#W*?p*Sk}P5fV>+g6YW~G1N((v1+0cTHr5ku`sI!l;K=fAGRk9(weF<$K1WW
z2W>RlKjV+GdSCT~;R)j2{SBe^vc6KK8;rU0cEj{G-vlp9Qo;G4Kx(*6k)0xjJW+R0
zw*W(o0@@e1cWY-dQN#{7HF=x+(=TINLQkKHA9smHW$}OZ;66^ZhLNfXf?P{~q_zGs
zkNAuAf&76xm@UqRfrU&txN7_UD9cgSxcRZzE#hw}d3BxZ{@7U3_*uLu^=(yv(^+nt
zC%39c-Y|z?YSLQgae4jid(Bb~>d>L2`tP_svB(v<o}etJiBn%AiNbU6^~;-+LNj<0
zX?gdEEp;;7rJ0Yy0A8GjnK<GRFgcP+y6Z0*P>!}nisTdcmvKn5QOQ>gpOT6#D=@#e
zHS_V=NU+zaIv>m>k@Y6}xhZPEAb9%+5@bgGp-Sh?YZ2VG@i)qZuc^%Iq4|2%@-62^
z@_99^smYG@D^r*E!{{nR;LTwF?crJQjzqlFuLIquBGT@oR3BRIZovCy!pD52C^18F
zcqsvl;02*0w_!<eu;rgrT>Y8kaVzq1B|S>V&<|{jnYk;s;M3nLlM}L8o^I4G0hYkg
z`_jI{pJ6IK6DdlO*Zu~rafFa0c~?&2wB)^=##1uZ%hN0AQ!p&2MNcr^tlU{QkE?_2
zWS4}OshvR8_@C#_(mhf4N`?(5E^}5~fL&QPhDZ8+OM$|JNR{o^Bz7N#9Dk=KwzGwU
zST?n7WR~#${)ecxx#7@I^vUlz-2(IqPaXBgMTF_>m1&OsxtuRnMz1~TghBYn2M9H(
zz_h7&BbqVf%;T}}w?%&O5o8J8Z~a5<-gz&7c`jY<bp?o>$bUxd<GN3te)+bl#3;5g
z1~n5-%?n*^-eP2s6TkiT=PNJxU9!r~GCj)pXLyZghyFw-*@eYQzr7v{i3!^UTM&5q
z`{-Q*P&an&C$78L-|$*7LT3XuYK2YU@+Z|^h3Q3g+np%EeFnIMYPGa7OnLlBc9#`S
z3+_8{v6h|*(QE`+k?uSI&@ID9)Vk!9J>p)~o=(S0!Msv{S^USQT*AZ9GXKpnrO07(
zdD$#`@#IL(O$B@-mxG_f8*peKK#efN=~G{VunM3rHB=>wHP^5Dg`D#0<@<xl_l=1P
zUiTt%8cK@66<r_8o7iF!sjih(u3eor+3yqt$hVyN6#~uL{1QHqNk={V#f^1%P9;Sm
z^eVT~@QJ~2#UfDG`(ECEXFG4&XEcU?hsyeG?fK~v@{@7q<GS-c5^(wB4~l@kD*Ttn
zonLdY)#=lY5-c$AbS<to6^c1;JN&t8K_CPrkzU?~47|HdUZARp-PAnZMGJc$tUibq
zyU5w&S*_;V!JP|tLU9Zv+D3nLJcsU<qs3Vb);&qk$JRPZ{lLa0YQVG+Vy%$Nay!TK
zb&@x}%7ecBCf{X*%{(~B(_tNmG=WCD9;_;)f<ygDm<6B{X6L?QZ>s-gF=KO_jn#p)
zr*8v~JFl?r5L)cO8UHS?i5in50pE=r8pu{10bkGsxqtE)_Ao8g(XA%HzCU4kvXB%A
zGGRsY)l}&GFpc6wMa;S}1Uv6>Ri4>aZ_-1dOaZMFi*4v2$9q3(hy|5T=?D~1m}dJu
zw-1Ir)j7sfDf}P`Tv)r`jA1mdDqYqKS*?_wk&dudCd4K%hE-qq9W)WNo_(%|M75I@
z^WN`paQ8M$0Hks3<XY$in^nz96@r7|aWBUfGHQj2{1SLA!5WuDnShX4m##s-eO9`J
zd%+$1!+VY1IJIfPBbYEY>w!Ky4A6&jpDoENzTI~U1eZO0NUfmqQIR>DJKTSXr+T|R
zU6oz$yzUIS>lY`Ih`A3`wD+o2(OCk%56^O*4dWu5;QGNIGOmHtP0Hca4w=ucqEuW}
zSFK#$2Q}?YWxS9)(H1L;-CNGdDN5(&a&_I^sk9^9Oduz)q5OaZC?o$PkUR%iYO<$2
z_InGQ5y{{p594i|Kb86%_il=(*-9}|GDMiY0kg`JdiTVm7K*1>rA^qv@20;Sf_cqE
zTh3mNeYsD=q+V+}efe=GJAStjl&ntMLJl08I|N8+DY1}(>6yB=kytk(K}{i)V8e0z
zvd+)$19RY5y_<9k*_AuctVOZ=LNNC;=dh{gAVyPRb)KDSlx<`4wtrBnRqDiY>W4?R
z3Mv49YU=0gOhA|AGBW#|L$$;GU5+2bW<Bg(0O->Ko;5HpoGro4udS&2vh&mjekSOT
z^BqbR#Fxb2lS8_zY+T!h13;J;W{u$mCJIa%t`uZSbZ($9aFzFxPBl*fD@Lw`6z@%3
znJhy`Hof$Q@q)P!m}%~f!|c{9!G?Rv!o|{Me#C)OgfmSbvV3N_c5#p>8CGCqO_h#B
z9(cHF1L^d-oY$1^93ew%0>UX6Aa%dpBYo@HM0uFbRpJgL<0Fa;R=samgHfd3X=-_q
z5<PKz>Ih&n?k+s{pog{RUF?l4D!>)ah>zP+N<$77fyrg<!GDLs>3KTQh&bl1tLITN
zdF*uJ_ola=fv?Rzn-zvpG>Ow@!%wKX<VJ*p+|suVRw!XN$ylYu+EcI_`)yK(mF&21
z_gNwc!}mBO8?5Qpak(0?9)su^Fv`Xko6uWv7lTw0%^s38LmQJE-S5N;KDYqdr#bbW
zoGeN5#*uGr6c5M~9dojb7~o=rRi*t9hmeQZbv6px-OskQy0>v*pi)xeuYI|TUE^2z
zj~*EY;}FPSUIfR6QCXw2kosLXbl#5_w2QOXb^73+T*ngYm^d0n+ek*khh4;Q47j=;
zZ}fY5A-cW4jnz|V7XEt{a?*-i0=bpEAtd(o)Wyh+dWN9AhV6XfW1$j?<+Pv7j5jEp
z#dmxZrRQT|+$cu5FI{qd5Zw%QbEspFr^nlo0)+{_jn|IxIQTx;LZ9Kyn(I0rTD0w%
z?J6*QUmx31X4Isko9B3Hi6@_R(nu_qO?a{F9(MI<X7ZAIrEh7wdE$M|H}=_@?8#zz
zdjof+Z*@}*(<DYsg%jBatG>b6oU?77O_pcWcd-f*KZMGc$ColcCyzD0R`b|JDv|>g
zncQ^mo`b(6p|oPwtOP?#H+DAmy~8M4))<{T8~f1e%Wc;AMVw&i!@HE5Cw%a_x;L~K
zm$P&`cGAAYZpz-e=;QIy%N@KijQj2#nUb`auv%PcZa#!TOdm(A^mQe+i=uSAbHQgH
zFH487?LPI&cS$mike*wCer>KAxou2uo^<6N^bx1(V4KA&@}C+5Gy{Wy@ivuE5axP0
z*%Dij1jm->uiren;0rEJ7At5r+}W&2p8q@pcuA3544iSj`VOxvF2+xc`CbGHv+eY^
z>_e&@Caz>BQ00&tNdg8XWFqMfxCvy&yG1L@{ZC6b3rG7&I0P=fM_j#&!x<nHNI1@;
zvin-~Yl12}e#R+$5*?8yBtKTWN({Q4(U~2kD3(NPlT-0G-+JACxTRN0NwIJ_uIqBM
zcYqGBS+vQOg43Fe);9Wtv_?K4EkU{Sd|!_4?>W5O-(E-0>G~cK<lbU-kuOP<5dwXR
znK4!sM!GT6)nA`+KW~_+uoF1Bed~7e5=crU)sr-^xJI1ZLOhJ%(fxd2)_zL&yd8^w
z+1GO9^y#yk%GP*?!^CKMlkH4x@E=TW_sW*5oQJy~X3i@M?RGk|jRIpkQr_jBs#kHc
zZ_{nUy_)a64KgwT_$`Xs?aXY_`rNNQOImfnMKS4~-VkJ)0xTyxT_~E7u<E?8m=<e#
z&sluydW6<GcqHPV<|I|{U&~qn=tNcDa&OwT=BUB?YzLQ!J|c411?nuTx%p)9w#c1D
z?&{-7QAnz<gG$#eA`U0e3pMiybCDFh@8;UWy?@hu&lfOg6z8#QSa1c%^ej`!9i_ms
zXyv!>q+U67uC*dOTkz7KqBmAGa*&mu^go4e`x}IC6CfjYls)LClBh6EE#b<LAujb{
zKBd)Xk(0{MHuJ9B6K=zu-iRpBsb^5Rk(kcb&FVEOEfXj|tx3_%jl}M^W-iYpvshSg
zi&EHDC49+-RW=`50#T&TEi8OW!IN2bD<7^*Jy)qZ1UV#?a-t9$Uw~}5L&EWUllP+T
zv1`OWZ-si$D`{ty`uB#|_S}C)unYU{KV_PLwe8;hl@RTHAUD+zb}e>l{GgHdge8f_
zK;iC}51w=c=Lp_LU97~{C0C+y+(TkLNJ1}*q7a0hEE)uSL&3+rc`iSXo3LJzV6$nP
zsGQDoEBL&KF0~D&!0-jXwtI&T10M9V_szd3I8Eb9umF#<%Zg<DchkX=uDPJ+U@<x#
zt~c-+dI3DA2=#~y;_A6&r<~K0l02TUT}qX<eM@&B3$)M2_x#Htef5gYwC6(UKY@cG
z{6C3?Zq!WJ&6hPATcS76Yh3VaS}#X2Oe1g=adFk>c2zHT>!NLQ&=7QfpA@+gm@`qm
z4wt@V&5L!8n3mEZ+zBE=1N4V%W*tsZ9u~}1uhV}xUk31Ib-u$-p;nX)P1}9YR$cn$
z2h{scciXVkTP^#*538KP4X08bz-qdAjRL3Mx3pv70i#U%Q!8!!+MV-Dfo(uZ#BS(A
zA~?LRJFaAzD6l}+z`}*RXSZlSBzv6FigR%OC*?Q>;E0MXF`}+Nn<J|pTpnA+U+`QO
zJDC-IptP{{Z4tZahx6~Y2`WO?b9agFR@A@ZA@0uogv5$OwpM(aCc(+)fG_P#zC+`p
z5w^DPuyunE1^lKa@lkmae&TXF6gt6WllNY^JGT%+=Oj(V>8)z3OZbCTeX`}_Z4&$u
z*N7;LGDgXqE@$*!t5a$5knaz-aIG@Zw(U0`vZL!@jK2+?zlfw8{7gD>ug-dR?0|FW
ziLKG-F=(P#=EpkD_o;pF)W^Z1*z|Tm4o-LNRikS&h{!(EU5C$k&B8TTOQ%9G_v#A6
z4^af!c##03>$mQ_k`+_5mCN>Ca;E01^=4IToP*!fYaE7e@X%y-9*NO;g}HS1hv$e*
z*3!|OMH2zXhsV?#1El~2D}-(8?a#xXDZD5pp0rLaxZnG4aJ*!{P0a&=kQI}DlY~^l
z-<p384!zlbyeP^3r`T>p`%m(Lbw@*HgmqMkDVCPvzpt4h*x~g`w22Y7xP*7v3fuiI
zjP@KsgfD>Y#C{twJDy%x5iyMQx+NPsf9UuMfg4tUF@#2%NjmPHzdZi*$G5Lp;cX28
zm=1H*06pmS4Pmsdh(3Sos-qc7ZPvqZkB6h^K}R%AZ($NlWEppEnt#ZOB`<f(aZ~vH
z^TN$hoUrp&;u^X>HLT~hNu`2|?JA_;I<Jm?%at8gW2Tj($7Hc+4%u`22eVv!=2j|R
zPicRu;_g*&!3nIwm3~0)fJtD_ZO}x$6RbstYyp-h5k(MpZD9Q9<_COuJq$l<7S<E_
zguUWmA1s}<Mz5+rvy<||fiaGc1i6BLlTBT`(}3Y-;(_t9<TCq+`J2?faX8vzgE&`#
z_9^lo3$T?xO|ExG1viT*Ppus8%fgac__OXw!g>p%82mg#bt=e+P6~XP?Lk(pGp)&<
z-Dz=5gkWD$VY2!*KgC0IfMf+7atZQ;XP)FydeE9Ww3n<$VrR4TPd`ibq0A8m?=tut
zAv8~I|4+6rt*&(7COL8aiqC1d29<OI@fmkC?>X!62Qya>0Imu`@aL>l{9*iT%bkxa
zj93%ZVf$YhdT-ih*O5Pt?mBI5pe0ey?ZmL#^C<%^oQc;$G`7EjsVb?$Ll4e*9K~ue
zF^@ZS01p=H%U5`|ecTqFm-5P}|IvwADQNno)U$4}>h;&Hnw|0zsSx9H_2kkKGCWQB
z-)Tn=*e)$_Ex+al5oGzZIiH4`Lh-&~iDmg%8|2|6dS+mv+)+Eq*1UCgFA=xcoz(y6
zL-Qw}b~VLEvC1|cCPFGlF7`ye{5p-l<lW+pf1ReNNU09WZ^BW(`vH<Tgs~hizM|?f
z$0$^h0ji?^#N_6TU%4X9)~U8v(6@(^%i~${Y~dA3YGZnW<Z`N#dSwu_wtLUqDy2}=
z2Mb4nDQHLcQE3Pv^%<wcZ^m)l<niYs@ovVx+MJmjoGwthQt7?zOj1AHD-R8s1*I(j
zjh&D8!L}ZAoB7@tpWlg7zPf_+e$XNOa3F<0OQxgNU1mNA3*iykMj()jf@|4-<9s$~
zq$AQ2ee67}zH*EzLAb;iI7Fg_9W*uN>VE&_EYZj%%cwfy<eFy_tQrKDp*t~AoIPs6
z^0qlgc58ntu^mHWky_}e>Ln;n^N+Zp-4v03;`fmdP~%a^^(kvqFNu+_brRCs3-6es
zlXOWWz-}VYrTXsEaAto}>_?d^6@<Z-o;ADHr%F$EJpm*5>|b5CFE&+~ZmU0|`)(_Q
zsLv6v$jA*68Iz#XSY($W-k@$V6&y>Fq11$2iQ0pg0!lMw2T`ha-VJPTr_KJDN(tf{
zQ7L7D_8R45bPrT&1U7WaJaQ(xwRkQp6z-t#`fX7;VzFiHb9gC>z}5ZtnxbjQCaeT_
z-Q{!Tgj(!g<)7#Dr~RTxwV$VyUK9G3f4eLQzj6XIH|1eRi+YcR;_cc?9na2-wYXdn
zz@(SHWx~+l4%&<*_yA4b!r)B;B?>YZNy2``qTYMQ5*j{sV(DPe)?)?QE1VIhE4@#`
zd_8?i*#zP(Z^d%BgSp;11zP=y15jaYJwR1s@}qWc9HOb@6YRG%3D7+`M2e%Tpx}@$
z*mldw34VaUyS}f_=Nn;bdH669SXi_<^}nd5VCdn@BFHDL0_)F2#ed=z`UCod21Rr)
zZ)s?qUEgM2X>_-X=v7+2$No%ALme`C29b)J67D>Ux2LVft|DQF$nOlNCpl?j14p)J
z293xFF;{)>2rdZHI9FlZGuY6>VE1<?ddsmRK9lld11ipq6$ntAq`1k`W$YxG@n9>b
z__#0F#-OMNM^Dnkfimp>k%quO(ulG%upm>M+TRNu2NDCxVWI2~xhV$Qr2)bRS<fzp
z-+C+<1Prhp_%g>sH4Y+`XXZad%Z2e`aCm&WRJq!qc}F#i%zLPL0RXNGORVZlJi1LP
zaeu4#(#hA(t2De=o~cIoqS^^~XyfiH8&fgfF_{^VGP3`;AA=nueeiyz3MN~(sHBMx
zy~y=0uq`Aj=Mj_kNKR6YfRj?hPwQ@1<UzzVCNqKG#AnsZBltd0z!ZMTbRGUdotzo8
z-e{ngw-?&ta<wPo(fw^DIUE5NNs!?45Y>n=c$!+k8gw2%(Y&C(a%MLutXfb1A@Z{S
zA;~NWNH@1u=k5O1Ce`Nra(xUG<RGSrPi)h1@1<)lF`H}{u`yV=S?)lE@ZS`8au-Y3
z{ik#u{(q%YYplzfK_#pPB2H@Wf?htjJBt&AErO-n;#0aX?*KdGoI()1+7PY*@hj{b
zXIrMl{+I-U7(#2_Iqb<KW#^Y#GeaDl9t7AQfwm%1Z3r0XHxpiI<%S$%hU|H&`5t-a
zQvgQ8{k^e%)~2eKm-fJ`_aq?jvU&e4-45=1%t_>6Vd1VR9Vi5j<x?G!Nhs4m3gbYu
zGupfIDSG`VwxCT4_;2Oa+<ajjb8^a`iRsCQ_`<&q8EsrgALN|tg|LNm%l*YBZ;kY4
z6k>(Wq+oqb3+|mhU3PpI!nT@U1zcQvVxON(*<n2d7H!U;ehD*g``tYb>%>A4Yt-Aj
zQPUB{%e{h_9!pILVtsEEt|0ph0aU@P!8EcsZqVD##c%UC@EC)7iBD|F&&Fu6u-t2N
zMe~3}6MnH1?!3%rM_5J($Z#`ES3+9H*!j9P+iD@RhufT1crj~;4^K9vAtZFxxuVu2
zY;((hPvM7$)3;Y^pNPToRdkJ;L+YP*m>hOk0qQLrD>jBl2L&YFlLV#FPg!s;gl^kZ
zbBiFkRbgtrx1^_IE+wR+9FnxeKM+6R7Ny?A`(l7br5aWRCm}Dz|9$jG*A?_-uyL<1
zI-(*+gIM_3<XsRD<@W@kV~;pKEVkg#TPXcsM!Xc+Z%B;zdkFL4r<$a3i#ygm#Iq_h
zCklXXG<{#t($6P_<io2ECTy)k$Bjfca~k>6P70%?v5eR}Ngav*lJcVDvtpk=WLPbp
z^Yk{PW8}O`tM)-#BfB)QzR2=Gju4KfBqkqAimzxEf^#>L$Nuqt85c9*PK!dBx;+);
zKj{$EU}v$w{*o4uaH-GEusNJ5DzWJ<HTdrvO#V;jD_RKlQ_SAnP<8(4?i_SAV?L57
zq{Df2w9Qx0EQDaP4026v!PPU!*AteC^@>Dxg+!+Rl~R7-%j{_0F^*~JT0Js7MT|{i
zzLz+Sw~6$30!Wi2WEEBN6*3soL-^2Jg#BOj|3~uw97Z2%G!$9=j<WxoGXLjBq$T`!
zOcI$1&{O}P4@*+t4G+$a=6cco^E;^55rPjFO2pnsPEO9QrReRTGcx%1kTd`P4v9nX
zTT`uvD<wEYZ>OybsSQ8&d>scU8h0){d;Xtu{QDdQe3<2S0ll5F|5r}bG%$&A(k-^%
zEdL+jL1kN_F2Fms%NwKdXZm17K6X~zko)OIn<4UB4WV_^AG?ELd!`71zi&}!d(&KI
zK-UBjRqSlb=S%I8`AB2c#qFK9ej^6f8EaRMr6a}){m={TKas!U8b0YA9(bXz{Jxen
zDEo*f_p^J<wFcHsx`F=z{}O~|n=GF3;i^W`?U#N97pV{KlS#}=#Q-w(MPb|Is6I({
ziozd$-ipeQc}rP&>8{$r@p=(P&C<DPS(|EN9RZsVjr>26orOtHkLJa<3_iAr)(tZB
zCiCxYPJ$%xfjYd8-AV1L{Pw~zMKy7Xr_+j)4gv-j_F3(!a$5<Qb{{ff8X9+B#Ygqa
z2bphJS#*B9rNFFa;wB&Gc3dB{FR!(@EnlkVaJY}R+}<=EfZX|8Ee}UB3e&{>5D%9c
zE|+ET<#irUYOES6v}<e+v#b~wuA4%z4KPugfAs%8fc0u@?{rp|FWg%)ORWTdByoTF
zoy=-~5q(d14fMCskki2fmTgsCt2`%fHSp2iaxLFJ*7CSl9J&Lj{=d$yJglj!>DLW4
zpe>ZJtDjZ^B!DbQideBCYgi<Zn~<<41VV@iNKml2)DILwScDtaDiDK^OP~pa3ZhkP
zSOO>k0s#vuiZ8N=@IvcnTYLjPeO~_PU*G-j+~+y>%$b=p^P4ks?rEX+9XV?if)>S(
zh0OB#bz$sdq4(3%dq}d5IdUUav819bP+HI>ABnj)^{X-e8aqoJOifY<o~ZvuojG|?
zvT|r|$Yg$7R2o;*33DTg0(Ha2PRA|P{d{qBz%{)m^yJ%P${Y4a9Uq3E4uzy1n@E3`
z(73$1qOfQdU0fW+h)<SBwxzZuJtOB7#Sv`6FDSs3UycE^i_kelOA*>|#4OQBl$+eB
zmdmTWi9~LOw76iEbGqS>_R=IXpan^pW1lnR(J>ll==qkvsou(7hMqlm`tl<Jg*jL>
z3-7EB7n#=+z#^gh;6ag_eGdi4c_+Ky4k^q{q6$LZiuxZF22lN(0H0A0lU`4F;PjU5
zKy4tD_*<M+;V*TbvoKrGTq(;xzo)LbJ@CRDd?s8urG&Rm-;7?ClvpK-96xx};*a#&
zYO?%8JD+frgg{|cL6T}d+q=U~?q47d)c%2-B*5pjk#gZp6wBRShgY%E4@-B*%FvKt
zYn0VUS3wF-EO1<v8im;fxq5<<HmwC`J-*Llf<A5L0}{J>z*i0&y-<oR^Qp(XZ@c0x
zi0VV2AU{;jJx}iS-)|iZsvb1ow`X^!_$=yA{oLzXAr~`@|0tL3P0PP*g&KVRd*Ee}
z@Q%u#P@TJKnw`@_Bvbtg!Pwlsu2^d;sjgub*uCnM_UDqr9G2q~N&!Mn`4MNl`gUI(
zr;7Crg<K%jz4rcqlOxgIQIlAoR2VyCB%P?UP&mhR0d@gm4bv%aPjM>RyAX(x%4tkn
zp$=OZ*LvW$ZHHD_bMqE4Q?%R^SJLBH<Op-wE!`TCOIz?%%@2_*da^*zHp$T_Ig*3x
zK|$OOoJerzjyw*dON!CrPimfO$6yFQ%EY57^T~lU^pvt=#(&sPcf8m4@$CB;WFhRX
zg7JvJa^ssa^Yblqhy7iC!568raA^K)Bf^3>hnBmSc<ZI@bUC!fmd&bfx5q%%!@NKa
z1Z7PQ$Rn@}HYm@U!7pNy3p1px13E|i1@^^O8fo2kPm_#J>1}zO#xoEpO&uaJeypz=
zolpg}NtA6)2_VHPR{B2hyG=Q18a)**tk<{FZoW-_N;SvSLZYYwS#Z=L3)?ZstEbqm
zQ5M-uj76?=|B*QqTWy96pQ;-%{)jU^f8NMvpKo3D5JODpl5<d1&(5_C%MYA796$`w
z{_BPf(>lH0W#3?DqG<QU@{jd(l2K$r#ZPzSOynR~=HBPR|8lVu$;JJp_}tJC6);<6
zeBN^62NT}({&{@7m~!Pf&2o4qOTGsNdhfRf!N3&+r#lk!#^*iu80>X%WPk&A313f4
z@ey0a-tB$`$m$08a}pB#jTY>RSmL2wR-%93*;FqL)AW*&dNZofG|vzbtTbf;o`M(h
zeWbw2Z5kr~=o%!Eo0ASO30PY1RCk2f2B$<z-J&ux-5O*p&fDK}c$ykt6ZaN)+qHbt
zdVK4Y$;XUN`BdE_y%B&<Z_a#ONr2`SrOhS1iG7U2#m^s%j3%BToHRM|4NcS!e}THJ
z=$D?&+u^<h9u_9y?%6s9T$M!z2rm-ia(*@gbS*ISbDF;QEmmx)97Wa89?SzVk}GFs
zSxNe+AmrAwH%}RfFEr*V0Xl3uA3#kmc)CBkRdt*7;rWS^yS8syjYp3?!2*IQW?Fvp
z^_5A#@F{b=!T7)8kSI}eW3$dFb`v2)?`R2CjEVs5pNpNt$l%a<*S~^a;|V_1_Jw83
zjy~IZek@(WVRpYDOs~j{tR~uD$ugD{|K8<6ruM&CF%oGZ91dS7LMiEp_r=DM*+mb-
zW~V>Q`7G<uG4E$(wZtv6AlwiM&zw0JQQ{pGObCHJP5CZjg(cm)I-KY(uGVRMsQIe{
zB?<H<g~Ecs=FRp1PxhqT?8Wyhr@MATAbuuy^x&)16EQohAKQi2F{T**M}+)r=(2uI
zhfEQMP-D>CThVbhfxhg6BZq3I=~vGd>QvO(-`4$_?e+=Rt$FXuk!JBmUpEx&*~7iL
z0ZT+5Q2XxaN#S7m-=JtMie;#w$aZc{+<FoJCxG#wWNj;<<jxE_dV`nODpf;~<9xpH
zhTicA^#<iRM7?9*dSSX6oU9?7D{HCo{^@DA4ZX`V)inb*G9lkHi2ov)2NmkXs0rG!
zHUIxh=0${W>1%H_COWpm9QSO2&lCNb<#gjH`y}Z3#|%_=uOa&6?3u=}(sB2XPm4}3
zyd;0hQpBh3%v)JpUb;Q8VKRA<Y7!Bn-z(xXzfIKs(by|Xi?^EHqQ232&}c1Tx5n8Q
zU#X||Ugh7TZrLOkbTb$9uda^Fk<Jh@KTRQntL9R~39nsWUbfG@BoYSliM0R{Z0!n9
zwt2mD9SKQs4e6T(BCxtK@3L{VenJk!s(4v5WPx_22!bnH7K)w?EmKe{ic+Su{+5Tx
z()M!;4|mYFQaLL%;!<jd3W>zR=_ZO6IY_-LaYRxkXEW(Z<;YXDwvOHzVxY(ca_Oue
z0BO|hz6$)CEKU={O)#KKI;-S-9voCR(#6R>;TYc$XDKtc*=?#3k(HTvwfi$?9Q=ac
z|0e7~;j3JNbc}g@6~q`0x+0iB&-K~1t3sQN|1+m@&q1kL$<!BksFlM|+>7=+5RcnY
zyo(-nN+4s2i{-je1xm;oqiX@Wf*m@bsI`p}8YKy|BtpkyETL=jjPvu@W)<tFlGSAw
zk`Je{rrg4oVi^@+5!?6+db~e085KaF+m&!-6RK<6j_HOe4StcBq^DoT28O2{AWof3
z2FG^9+7&+K2>l|(rOP=>i8R9P6cLu}Db?;XDa#Q#hf$Rz_w8|hqgTIsJ}j3oU0V#3
ztl&&YL5*(f&`A@t{mPOCTEXQr127IB*U;c^&`X4r{(?rHE*Qk0uEVCZ$l@SmqQNqK
zLWh==%EL+VVd^8Lu#k1}Z_@dnC$pP2^Jmnp^1Ms_5Pg{<B|{1(J3|NaR8;L87DBf*
zLTO<kS!D7#X~KBBu!%C7uxqT>?QSc<%zeMwUqKLUGsELhk}xM3N$MgC5UWt|V+WJa
zhEawjh~)_GAFeD~RNq7ZVTyGTG`cGRC}o9~e2t7DAEsnJ&I8QhyP++q0}T8db`~Sw
zzhR^h=o~YUcvUL8lU$i)*i;aVd6hlN;K6~Is(=XP7d$Q6P#N4cLzQilL1mXbj6}7&
zU|Lmv^VQ78KB5o+JKUz|fsI^6Z3gSw&==|j91UBu6ms0)H<=IKeVG?gFh-$b4ShOb
zY2*}fckqC+RuC+Lh<f_71Xm+Wtl~(H(<8~aYZQsSqp28eeB=1ph2{6w6<VOF^0kg@
z9qsD)L%E?E^|uaHq7<=BGDpF9Ae%LPZ@d1NJTYpqDfOA+qYg2I*$gl7K@^CEE+O@`
z*b@d9u5`sVj`1ancV<=bIlgu{ARta?odqmA&V_pCTCZ7`2bXsCzh1eNuh^xQ$U($&
zj2;RDhDz)2G__ZdRF>EpxZz7K6RFY_$*Cu5LvI7NinRGq9}}pyp8>DVQjK~vNQKO<
z-!GkK|9bBdFgrAiJ29tQIpSXryg{DIRx+qkddB2|AKc<>Db#;^#*1wh$tV;ppb-E{
zbw+UUv}2rJ0g_!3B2X3?A#sG{vOdk8TE6f=s&2tzsTX@JQB$jGm}WXGhZQ)Bh)J<d
z9m9Uwv>6*v2*|`~=UUed=9Yus4tM^oR@Xj*tgN8n*+Mw}<&EF~d}VqaK9=rEN$1N6
z!mEAW!2W2m;jdVOvc88VKg9|ih=UX|-TX2*@IaEd4Yd!k<<(9^Wj~2X=6DUsYbt#!
zMKzX0n~6y~FtmZt6VVt@Lx6FV9pTJ-)-CrX13B}*;aDtGq@)u2gV0Nk`9KBjn5x2B
zIV#D{Ft$b_)6ks6r2*+~)&U(}6)@ZHP5>paBg4<Kb$Lpd7M}cu&cFK<VB8u(*Q^Jq
z??hW2@;zP!q43+*1r)PCT*mupJRDPV(rdv?QOoy-&^#5S)7@LZ;Ki+%oo4UY+>~<k
z0PnKK@r_PwznUqLWD6rRwMMS3zO~@jZR;k9Q_eMd#qss#I`=+RQh4*gs)P+ucDTzI
z>fG5se00f{Ow8)=+%S^%nnk-7sA05q!${ZD)rR~ZhQFTs71faO#oq*}De%x!yU+r<
ifKk5TrGfU=)qUdjCyOaRKW_g_{dl_iy3{*|fBzrAbPV+X

diff --git a/scalnet/.travis.yml b/scalnet/.travis.yml
deleted file mode 100644
index b6879f83d..000000000
--- a/scalnet/.travis.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-language: scala
-scala: dummy
-jdk: oraclejdk8
-
-script:
-  - sbt ++2.11.12 clean compile test
-  - sbt ++2.10.7 clean compile test
-  - ./change-scala-versions.sh "2.11"
-  - mvn -q clean package
-  - ./change-scala-versions.sh "2.10"
-  - mvn -q clean package

From 966642c1c9b0fd60ef98b8d7a0f6804ca45d7794 Mon Sep 17 00:00:00 2001
From: raver119 <raver119@gmail.com>
Date: Sat, 30 May 2020 21:13:33 +0300
Subject: [PATCH 17/21] Rng tweaks (#479)

* initial commit

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Java Random.getFloat()/getDouble() methods mapped to C++

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Refactored relativeT for float and double data types.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored float relativeT method.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored relativeT

Signed-off-by: shugeo <sgazeos@gmail.com>

* - additional rng tests
- float/double uniform generation methos slightly changed

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* use bitset instead of manual conversion

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* rollback valueBits changes

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* remove unused shapelist

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* update KMeans ground truth test

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* dedicated union to make MSVC happy

Signed-off-by: raver119 <raver119@gmail.com>

* minor tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* .seh_savexmm workaround?

Signed-off-by: raver119 <raver119@gmail.com>

* don't use march=native in tests on windows

Signed-off-by: raver119 <raver119@gmail.com>

Co-authored-by: shugeo <sgazeos@gmail.com>
---
 .../clustering/kmeans/KMeansTest.java         |   8 +-
 libnd4j/include/array/NDArray.hXX             |   6 +-
 libnd4j/include/graph/RandomGenerator.h       | 103 ++++++++++--------
 libnd4j/include/helpers/StringUtils.h         |  11 ++
 libnd4j/include/helpers/impl/BitwiseUtils.cpp |  22 ++--
 libnd4j/include/helpers/impl/StringUtils.cpp  |  26 +++++
 libnd4j/include/legacy/NativeOps.h            |   2 +
 libnd4j/include/legacy/cpu/NativeOps.cpp      |   8 ++
 libnd4j/include/legacy/cuda/NativeOps.cu      |   8 ++
 .../generic/images/resize_images.cpp          |   1 -
 libnd4j/include/types/u32.h                   |  40 +++++++
 libnd4j/tests_cpu/layers_tests/CMakeLists.txt |   4 +-
 libnd4j/tests_cpu/layers_tests/RNGTests.cpp   | 103 +++++++++++++++++-
 .../tests_cpu/layers_tests/StringTests.cpp    |  12 ++
 .../java/org/nd4j/nativeblas/NativeOps.java   |   2 +
 .../main/java/org/nd4j/rng/NativeRandom.java  |   8 +-
 .../linalg/jcublas/rng/CudaNativeRandom.java  |  10 ++
 .../java/org/nd4j/nativeblas/Nd4jCuda.java    |  13 ++-
 .../cpu/nativecpu/rng/CpuNativeRandom.java    |  10 ++
 .../java/org/nd4j/nativeblas/Nd4jCpu.java     |  13 ++-
 .../java/org/nd4j/linalg/rng/RandomTests.java |  22 ++++
 21 files changed, 350 insertions(+), 82 deletions(-)
 create mode 100644 libnd4j/include/types/u32.h

diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java
index abbfa04bc..e01274a71 100644
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java
@@ -273,10 +273,10 @@ public class KMeansTest extends BaseDL4JTest {
             ClusterSet clusterSet = kMeansClustering.applyTo(points);
 
             double[] centroid1 = {2.44e8,    2.71e8,    2.98e8,    3.25e8};
-            double[] centroid2 = {5.14e8,    5.41e8,    5.68e8,    5.95e8};
-            double[] centroid3 = {1000000.0, 2.8E7, 5.5E7, 8.2E7};
-            double[] centroid4 = {7.03E8, 7.3E8, 7.57E8, 7.84E8};
-            double[] centroid5 = {3.79E8, 4.06E8, 4.33E8, 4.6E8};
+            double[] centroid2 = {1000000.0, 2.8E7, 5.5E7, 8.2E7};
+            double[] centroid3 = {5.95E8,    6.22e8,    6.49e8,    6.76e8};
+            double[] centroid4 = {3.79E8, 4.06E8, 4.33E8, 4.6E8};
+            double[] centroid5 = {5.5E7, 8.2E7, 1.09E8, 1.36E8};
 
             assertArrayEquals(centroid1, clusterSet.getClusters().get(0).getCenter().getArray().toDoubleVector(), 1e-4);
             assertArrayEquals(centroid2, clusterSet.getClusters().get(1).getCenter().getArray().toDoubleVector(), 1e-4);
diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX
index 773d845ab..9e48b05de 100644
--- a/libnd4j/include/array/NDArray.hXX
+++ b/libnd4j/include/array/NDArray.hXX
@@ -1671,11 +1671,11 @@ void NDArray::printLinearBuffer() const {
     }
     else if(this->dataType() == sd::DataType::FLOAT32) {
         for(Nd4jLong e = 0; e < len; e++)
-            printf("%.3f, ", this->bufferAsT<float>()[e * ews]);
+            printf("%.8f, ", this->bufferAsT<float>()[e * ews]);
     }
     else if(this->dataType() == sd::DataType::DOUBLE) {
         for(Nd4jLong e = 0; e < len; e++)
-            printf("%.3f, ", this->bufferAsT<double>()[e * ews]);
+            printf("%.8f, ", this->bufferAsT<double>()[e * ews]);
     }
     else
         throw std::invalid_argument("NDArray::printLinearBuffer: not implemented yet for this data type !");
@@ -1773,7 +1773,7 @@ void NDArray::printIndexedBuffer(const char* msg, Nd4jLong limit) const {
         if (this->isZ())
             printf("%lld\n", this->e<Nd4jLong>(0));
         else if (this->isR())
-            printf("%f\n", this->e<float>(0));
+            printf("%.8f\n", this->e<float>(0));
         else if (this->isB()) {
             printf("%s\n", this->e<bool>(0)?"true":"false");
         }
diff --git a/libnd4j/include/graph/RandomGenerator.h b/libnd4j/include/graph/RandomGenerator.h
index ef06c345d..407993a09 100644
--- a/libnd4j/include/graph/RandomGenerator.h
+++ b/libnd4j/include/graph/RandomGenerator.h
@@ -22,6 +22,7 @@
 #define LIBND4J_GRAPH_RNG_H
 
 #include <types/u64.h>
+#include <types/u32.h>
 #include <system/pointercast.h>
 #include <system/op_boilerplate.h>
 #include <system/dll.h>
@@ -29,6 +30,7 @@
 #include <array/DataTypeUtils.h>
 #include <helpers/logger.h>
 #include <stdexcept>
+#include <math/templatemath.h>
 
 #ifdef __CUDACC__
 #include <cuda.h>
@@ -79,9 +81,9 @@ namespace sd {
              */
             static FORCEINLINE Nd4jLong currentMilliseconds();
 
-
-            FORCEINLINE _CUDA_HD uint32_t xoroshiro32(Nd4jLong index);
-            FORCEINLINE _CUDA_HD uint64_t xoroshiro64(Nd4jLong index);
+        public:
+            FORCEINLINE _CUDA_HD uint32_t xoroshiro32(uint64_t index);
+            FORCEINLINE _CUDA_HD uint64_t xoroshiro64(uint64_t index);
 
             /**
              * This method returns integer value between 0 and MAX_UINT
@@ -119,7 +121,7 @@ namespace sd {
             FORCEINLINE _CUDA_HD int relativeInt(Nd4jLong index);
             FORCEINLINE _CUDA_HD Nd4jLong relativeLong(Nd4jLong index);
 
-            FORCEINLINE _CUDA_HD void rewindH(Nd4jLong steps);
+            FORCEINLINE _CUDA_HD void rewindH(uint64_t steps);
 
             /**
              * These methods set up only node states, with non-changed root ones
@@ -172,6 +174,24 @@ namespace sd {
             return v;
         }
 
+        template <>
+        _CUDA_HD FORCEINLINE float RandomGenerator::relativeT<float>(Nd4jLong index) {
+            u32 u;
+            u._u32 = (0x3f800000 | (this->xoroshiro32(index) >> 9));
+            return u._f32 - 1.0f;
+        }
+
+        template <>
+        _CUDA_HD FORCEINLINE double RandomGenerator::relativeT<double>(Nd4jLong index) {
+#ifdef __DOUBLE_RNG__
+          u64 u;
+          u._ulong = ((UINT64_C(0x3FF) << 52) | (this->xoroshiro64(index) >> 12));
+          return u._double - 1.0;
+#else
+          return (double) relativeT<float>(index);
+#endif
+        }
+
         template <>
         _CUDA_HD FORCEINLINE uint64_t RandomGenerator::relativeT<uint64_t>(Nd4jLong index) {
             return this->xoroshiro64(index);
@@ -184,16 +204,14 @@ namespace sd {
 
         template <>
         _CUDA_HD FORCEINLINE int RandomGenerator::relativeT<int>(Nd4jLong index) {
-            auto x = this->relativeT<uint32_t>(index);
-            auto r = static_cast<int>(x % DataTypeUtils::max<int>());
-            return r;
+            auto r = relativeT<uint32_t>(index);
+            return r <= DataTypeUtils::max<int>() ? r : r % DataTypeUtils::max<int>();
         }
 
         template <>
         _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeT<Nd4jLong>(Nd4jLong index) {
-            auto x = this->relativeT<uint64_t>(index);
-            auto r = static_cast<Nd4jLong>(x % DataTypeUtils::max<Nd4jLong>());
-            return r;
+            auto r = relativeT<uint64_t>(index);
+            return r <= DataTypeUtils::max<Nd4jLong>() ? r : r % DataTypeUtils::max<Nd4jLong>();
         }
 
         template <typename T>
@@ -220,24 +238,18 @@ namespace sd {
         template <typename T>
         _CUDA_HD FORCEINLINE T RandomGenerator::relativeT(Nd4jLong index) {
             // This is default implementation for floating point types
-#ifdef __DOUBLE_RNG__            
-            auto i = static_cast<double>(this->relativeT<uint64_t>(index));
-            auto r = i / static_cast<double>(DataTypeUtils::max<uint64_t>());
-            return static_cast<T>(r);
-#else            
-            auto i = static_cast<float>(this->relativeT<uint32_t>(index));            
-            auto r = i / static_cast<float>(DataTypeUtils::max<uint32_t>());
-            return static_cast<T>(r);
-#endif
+            return static_cast<T>(relativeT<float>(index));
         }
 
 
         _CUDA_HD FORCEINLINE int RandomGenerator::relativeInt(Nd4jLong index) {
-            return relativeT<int>(index);
+            auto r = relativeT<uint32_t>(index);
+            return r <= DataTypeUtils::max<int>() ? r : r % DataTypeUtils::max<int>();
         }
 
         _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeLong(Nd4jLong index) {
-            return relativeT<Nd4jLong>(index);
+            auto r = relativeT<uint64_t>(index);
+            return r <= DataTypeUtils::max<Nd4jLong>() ? r : r % DataTypeUtils::max<Nd4jLong>();
         }
 
         //////
@@ -249,23 +261,12 @@ namespace sd {
             return (x << k) | (x >> (64 - k));
         }
 
-        _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(Nd4jLong index) {
-
-            auto s0 = _rootState._ulong;            
-            auto s1 = _nodeState._ulong;
-
-            // xor by idx
-            s0 |= ((index + 2) * (s1 + 24243287));            
-            s1 ^= ((index + 2) * (s0 + 723829));
-            
-            unsigned long val = 0;
-            val = s1 ^ s0;
-            int* pHalf = reinterpret_cast<int*>(&val);
-
-            return rotl(*pHalf * 0x9E3779BB, 5) * 5;
+        static FORCEINLINE _CUDA_HD uint32_t next(uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3) {
+          const uint32_t result = rotl(s0 + s3, 7) + s0;
+          return result;
         }
 
-        _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(Nd4jLong index) {
+        _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(uint64_t index) {
             auto s0 = _rootState._ulong;
             auto s1 = _nodeState._ulong;
 
@@ -273,23 +274,29 @@ namespace sd {
             s0 |= ((index + 2) * (s1 + 24243287));
             s1 ^= ((index + 2) * (s0 + 723829));
 
-            // since we're not modifying state - do rotl step right here
-            s1 ^= s0;
-            s0 = rotl(s0, 55) ^ s1 ^ (s1 << 14);
-            s1 = rotl(s1, 36);
+            unsigned long val = 0;
+            val = s1 ^ s0;
+            int* pHalf = reinterpret_cast<int*>(&val);
 
-            return s0 + s1;
+            return rotl(*pHalf * 0x9E3779BB, 5) * 5;
         }
 
-        _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(Nd4jLong steps) {
-            auto s0 = _nodeState._du32._v0;
-            auto s1 = _nodeState._du32._v1;
+        _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(uint64_t index) {
+            uint64_t upper = ((uint64_t) xoroshiro32(index)) << 32;
+            uint32_t lower = xoroshiro32(sd::math::nd4j_rotl<uint64_t>(index, 32));
+            return upper + lower;
+        }
 
-            s1 ^= s0;
-            _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b
-            _nodeState._du32._v1 = rotl(s1, 13); // c
+        _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(uint64_t steps) {
+          // we only update node state, if any
+          auto s0 = _nodeState._du32._v0;
+          auto s1 = _nodeState._du32._v1;
 
-            _nodeState._long ^= (steps ^ 0xdeadbeef);
+          s1 ^= s0;
+          _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b
+          _nodeState._du32._v1 = rotl(s1, 13); // c
+
+          _nodeState._long ^= (steps ^ 0xdeadbeef);
         }
     }
 }
diff --git a/libnd4j/include/helpers/StringUtils.h b/libnd4j/include/helpers/StringUtils.h
index ef9586637..e5f9f2990 100644
--- a/libnd4j/include/helpers/StringUtils.h
+++ b/libnd4j/include/helpers/StringUtils.h
@@ -44,6 +44,14 @@ namespace sd {
             return os.str();
         }
 
+        /**
+         * These methods convert integer values to string with 0s and 1s
+         * @param value
+         * @return
+         */
+        template <typename T>
+        static std::string bitsToString(T value);
+
         /**
          * This method just concatenates error message with a given graphId
          * @param message
@@ -137,6 +145,9 @@ namespace sd {
         * @return boolean status
         */
         static bool u32StringToU8String(const std::u32string& u32, std::string& u8);
+
+        template <typename T>
+        static std::string vectorToString(const std::vector<T> &vec);
     };
 }
 
diff --git a/libnd4j/include/helpers/impl/BitwiseUtils.cpp b/libnd4j/include/helpers/impl/BitwiseUtils.cpp
index e3f4ce92a..9bd3fa8cf 100644
--- a/libnd4j/include/helpers/impl/BitwiseUtils.cpp
+++ b/libnd4j/include/helpers/impl/BitwiseUtils.cpp
@@ -49,31 +49,29 @@ namespace sd {
         return -1;
     }
 
-
     std::vector<int> BitwiseUtils::valueBits(int holder) {
         std::vector<int> bits;
         if (holder == 0) {
-            for (int e = 0; e < 32; e++)
-                bits.emplace_back(0);
+          for (int e = 0; e < 32; e++)
+            bits.emplace_back(0);
 
-            return bits;
+          return bits;
         }
 
-
 #ifdef REVERSE_BITS
         for (int e = 32; e >= 0; e--) {
 #else
         for (int e = 0; e < 32; e++) {
 #endif
-            bool isOne = (holder & 1 << e) != 0;
+        bool isOne = (holder & 1 << e) != 0;
 
-            if (isOne)
-                bits.emplace_back(1);
-            else
-                bits.emplace_back(0);
-        }
+        if (isOne)
+          bits.emplace_back(1);
+        else
+          bits.emplace_back(0);
+      }
 
-        return bits;
+      return bits;
     }
 
     sd::ByteOrder BitwiseUtils::asByteOrder() {
diff --git a/libnd4j/include/helpers/impl/StringUtils.cpp b/libnd4j/include/helpers/impl/StringUtils.cpp
index 5ac2fd8cc..757def763 100644
--- a/libnd4j/include/helpers/impl/StringUtils.cpp
+++ b/libnd4j/include/helpers/impl/StringUtils.cpp
@@ -21,7 +21,9 @@
 //
 
 #include <helpers/StringUtils.h>
+#include <helpers/BitwiseUtils.h>
 #include <exceptions/datatype_exception.h>
+#include <bitset>
 
 namespace sd {
     static FORCEINLINE bool match(const uint8_t *haystack, const uint8_t *needle, uint64_t length) {
@@ -32,6 +34,17 @@ namespace sd {
         return true;
     }
 
+    template <typename T>
+    std::string StringUtils::bitsToString(T value) {
+      return std::bitset<sizeof(T) * 8>(value).to_string();
+    }
+
+template std::string StringUtils::bitsToString(int value);
+template std::string StringUtils::bitsToString(uint32_t value);
+template std::string StringUtils::bitsToString(Nd4jLong value);
+template std::string StringUtils::bitsToString(uint64_t value);
+
+
     uint64_t StringUtils::countSubarrays(const void *vhaystack, uint64_t haystackLength, const void *vneedle, uint64_t needleLength) {
         auto haystack = reinterpret_cast<const uint8_t*>(vhaystack);
         auto needle = reinterpret_cast<const uint8_t*>(vneedle);
@@ -155,4 +168,17 @@ namespace sd {
         return true;
     }
 
+  template<typename T>
+  std::string StringUtils::vectorToString(const std::vector<T> &vec) {
+    std::string result;
+    for (auto v:vec)
+      result += valueToString<T>(v);
+
+    return result;
+  }
+
+  template std::string StringUtils::vectorToString(const std::vector<int> &vec);
+  template std::string StringUtils::vectorToString(const std::vector<Nd4jLong> &vec);
+  template std::string StringUtils::vectorToString(const std::vector<int16_t> &vec);
+  template std::string StringUtils::vectorToString(const std::vector<uint32_t> &vec);
 }
diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h
index 17affd1c3..c72b0d535 100755
--- a/libnd4j/include/legacy/NativeOps.h
+++ b/libnd4j/include/legacy/NativeOps.h
@@ -1606,6 +1606,8 @@ ND4J_EXPORT OpaqueRandomGenerator* createRandomGenerator(Nd4jLong rootSeed = 0,
 ND4J_EXPORT Nd4jLong getRandomGeneratorRootState(OpaqueRandomGenerator* ptr);
 ND4J_EXPORT Nd4jLong getRandomGeneratorNodeState(OpaqueRandomGenerator* ptr);
 ND4J_EXPORT void setRandomGeneratorStates(OpaqueRandomGenerator* ptr, Nd4jLong rootSeed = 0, Nd4jLong nodeSeed = 0);
+ND4J_EXPORT float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator* ptr, Nd4jLong index);
+ND4J_EXPORT double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator* ptr, Nd4jLong index);
 ND4J_EXPORT int getRandomGeneratorRelativeInt(OpaqueRandomGenerator* ptr, Nd4jLong index);
 ND4J_EXPORT Nd4jLong getRandomGeneratorRelativeLong(OpaqueRandomGenerator* ptr, Nd4jLong index);
 ND4J_EXPORT void deleteRandomGenerator(OpaqueRandomGenerator* ptr);
diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp
index 799351ccc..ae8a22a6a 100644
--- a/libnd4j/include/legacy/cpu/NativeOps.cpp
+++ b/libnd4j/include/legacy/cpu/NativeOps.cpp
@@ -2832,6 +2832,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed
     ptr->setStates(rootSeed, nodeSeed);
 }
 
+float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
+  return ptr->relativeT<float>(index);
+}
+
+double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
+  return ptr->relativeT<double>(index);
+}
+
 int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
     return ptr->relativeInt(index);
 }
diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu
index 8be9b3bfd..465029207 100755
--- a/libnd4j/include/legacy/cuda/NativeOps.cu
+++ b/libnd4j/include/legacy/cuda/NativeOps.cu
@@ -3515,6 +3515,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed
     ptr->setStates(rootSeed, nodeSeed);
 }
 
+float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
+  return ptr->relativeT<float>(index);
+}
+
+double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
+  return ptr->relativeT<double>(index);
+}
+
 int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) {
     return ptr->relativeInt(index);
 }
diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
index c3f9ae8f1..18d048450 100644
--- a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
@@ -81,7 +81,6 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(resize_images) {
-            auto shapeList = SHAPELIST(); 
             auto in = inputShape->at(0);
 
             Nd4jLong* outputShape;
diff --git a/libnd4j/include/types/u32.h b/libnd4j/include/types/u32.h
new file mode 100644
index 000000000..115b207cb
--- /dev/null
+++ b/libnd4j/include/types/u32.h
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@protonmail.com
+//
+#ifndef SD_U32_H
+#define SD_U32_H
+
+#include <cstdint>
+#include <system/pointercast.h>
+
+
+namespace sd {
+    union u32 {
+        bool _bool;
+        int8_t _s8;
+        uint8_t _u8;
+        int16_t _s16;
+        uint16_t _u16;
+        int32_t _s32;
+        uint32_t _u32;
+        float _f32;
+    };
+}
+
+#endif
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt
index 5ae202542..563bf58f6 100644
--- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt
+++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt
@@ -45,7 +45,7 @@ if (APPLE)
     set(CMAKE_CXX_FLAGS  " -fPIC -D__APPLE_OS__=true")
 elseif(WIN32)
     if (SD_CPU)
-        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3")
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx -mavx2 -O3")
     endif()
 
 	if (SD_CPU AND LINUX)
@@ -61,7 +61,7 @@ else()
     endif()
 
     if (SD_CPU AND SD_SANITIZE)
-        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -fsanitize=address")
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address")
     else()
         # CUDA?
     endif()
diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
index 37facc43c..469cc77be 100644
--- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp
@@ -49,8 +49,8 @@ public:
         //_bufferB = new Nd4jLong[100000];
         //_rngA = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA);
         //_rngB = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferB);
-        _rngA.setStates(_seed, _seed);
-        _rngB.setStates(_seed, _seed);
+        _rngA.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7);
+        _rngB.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7);
         nexp0->assign(-1.0f);
         nexp1->assign(-2.0f);
         nexp2->assign(-3.0f);
@@ -204,6 +204,9 @@ TEST_F(RNGTests, Test_Uniform_1) {
     RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x0, 1.0f, 2.0f);
     RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngB, &x1, 1.0f, 2.0f);
 
+    x0.printLinearBuffer();
+    x1.printLinearBuffer();
+
     ASSERT_TRUE(x0.equalsTo(&x1));
 
     ASSERT_FALSE(x0.equalsTo(nexp0));
@@ -212,10 +215,82 @@ TEST_F(RNGTests, Test_Uniform_1) {
 
     for (int e = 0; e < x0.lengthOf(); e++) {
         float v = x0.e<float>(e);
+        nd4j_printf("%f\n", v);
         ASSERT_TRUE(v >= 1.0f && v <= 2.0f);
     }
 }
 
+TEST_F(RNGTests, Test_Uniform_10) {
+  auto x = NDArrayFactory::create<float>('c', {10000, 10000});
+  auto z = NDArrayFactory::create<float>(0.0f);
+
+  RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f);
+
+  sd::ops::reduce_max op;
+  auto status = op.execute({&x}, {&z});
+  ASSERT_EQ(Status::OK(), status);
+
+  ASSERT_LT(z.t<float>(0), 1.0f);
+}
+
+TEST_F(RNGTests, Test_Uniform_10_double) {
+  auto x = NDArrayFactory::create<double>('c', {10000, 10000});
+  auto z = NDArrayFactory::create<double>(0.0f);
+
+  RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f);
+
+  sd::ops::reduce_max op;
+  auto status = op.execute({&x}, {&z});
+  ASSERT_EQ(Status::OK(), status);
+
+  ASSERT_LT(z.t<double>(0), 1.0);
+}
+
+TEST_F(RNGTests, Test_Uniform_11) {
+  uint32_t max = 0;
+  for (int e = 0; e < 100000000; e++) {
+    auto v = _rngA.xoroshiro32(e) >> 8;
+    if (v > max)
+      max = v;
+  }
+
+  nd4j_printf("Max value: %i\n", (int) max);
+}
+
+TEST_F(RNGTests, Test_Uniform_12) {
+  float max = -std::numeric_limits<float>::infinity();
+  float min = std::numeric_limits<float>::infinity();
+  for (int e = 0; e < 100000000; e++) {
+    auto v = _rngA.relativeT<float>(e);
+    if (v > max)
+      max = v;
+
+    if (v < min)
+      min = v;
+  }
+
+  nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min);
+  ASSERT_LT(max, 1.0f);
+  ASSERT_GE(min, 0.0);
+}
+
+TEST_F(RNGTests, Test_Uniform_13) {
+  double max = -std::numeric_limits<double>::infinity();
+  double min = std::numeric_limits<double>::infinity();
+  for (int e = 0; e < 100000000; e++) {
+    auto v = _rngA.relativeT<double>(e);
+    if (v > max)
+      max = v;
+
+    if (v < min)
+      min = v;
+  }
+
+  nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min);
+  ASSERT_LT(max, 1.0);
+  ASSERT_GE(min, 0.0);
+}
+
 TEST_F(RNGTests, Test_Uniform_3) {
     auto x0 = NDArrayFactory::create<double>('c', {1000000});
 
@@ -258,8 +333,8 @@ TEST_F(RNGTests, Test_Gaussian_1) {
 }
 
 TEST_F(RNGTests, Test_Gaussian_21) {
-    auto x0 = NDArrayFactory::create<float>('c', {10, 10});
-    auto x1 = NDArrayFactory::create<float>('c', {10, 10});
+    auto x0 = NDArrayFactory::create<float>('c', {1000, 1000});
+    auto x1 = NDArrayFactory::create<float>('c', {1000, 1000});
 
     RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngA, &x0, 0.0f, 1.0f);
     RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngB, &x1, 0.0f, 1.0f);
@@ -983,6 +1058,26 @@ TEST_F(RNGTests, Test_UniformDistribution_04) {
 
 }
 
+TEST_F(RNGTests, Test_UniformDistribution_05) {
+    auto x = NDArrayFactory::create<Nd4jLong>('c', {2}, {10000, 10000});
+    auto al = NDArrayFactory::create<float>(0.f);
+    auto be = NDArrayFactory::create<float>(1.f);
+    auto exp0 = NDArrayFactory::create<float>('c', {10000, 10000});
+
+
+    sd::ops::randomuniform op;
+    auto result = op.evaluate({&x, &al, &be}, {}, {},{}, {DataType::FLOAT32});
+    ASSERT_EQ(Status::OK(), result.status());
+
+    auto z = result.at(0);
+    ASSERT_TRUE(exp0.isSameShape(z));
+    ASSERT_FALSE(exp0.equalsTo(z));
+
+    sd::ops::reduce_max checkOp;
+    auto checkResult = checkOp.evaluate({z});
+    checkResult[0]->printIndexedBuffer("Max on uniform with 0 to 1 on 100M cases is");
+}
+
 namespace sd {
     namespace tests {
         static void fillList(Nd4jLong seed, int numberOfArrays, std::vector<Nd4jLong> &shape, std::vector<NDArray*> &list, sd::graph::RandomGenerator *rng) {
diff --git a/libnd4j/tests_cpu/layers_tests/StringTests.cpp b/libnd4j/tests_cpu/layers_tests/StringTests.cpp
index 272c410c7..41352246e 100644
--- a/libnd4j/tests_cpu/layers_tests/StringTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/StringTests.cpp
@@ -25,6 +25,8 @@
 #include <array/NDArrayFactory.h>
 #include "testlayers.h"
 #include <graph/Stash.h>
+#include <helpers/BitwiseUtils.h>
+#include <bitset>
 
 using namespace sd;
 
@@ -863,3 +865,13 @@ TEST_F(StringTests, Basic_cast_UTF8toUTF32) {
     ASSERT_EQ(u8, z0);
     ASSERT_EQ(u32, z1);
 }
+
+TEST_F(StringTests, test_bit_string_1) {
+  // check bits -> vector conversion first
+  auto vec = BitwiseUtils::valueBits(1);
+
+  // check bits -> string conversion next;
+  auto str = StringUtils::bitsToString(1);
+  ASSERT_EQ(32, str.length());
+  ASSERT_EQ(std::string("00000000000000000000000000000001"), str);
+}
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
index c7789d7dc..ae9ff1e94 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
@@ -1154,6 +1154,8 @@ public interface NativeOps {
     long getRandomGeneratorRootState(OpaqueRandomGenerator ptr);
     long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr);
     void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/);
+    float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
+    double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
     int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
     long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
     void deleteRandomGenerator(OpaqueRandomGenerator ptr);
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java
index 563fe2e45..04f9c7499 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java
@@ -107,14 +107,10 @@ public abstract class NativeRandom implements Random {
     }
 
     @Override
-    public float nextFloat() {
-        return (float) nextInt() / (float) Integer.MAX_VALUE;
-    }
+    public abstract float nextFloat();
 
     @Override
-    public double nextDouble() {
-        return (double) nextInt() / (double) Integer.MAX_VALUE;
-    }
+    public abstract double nextDouble();
 
     @Override
     public double nextGaussian() {
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java
index edb5d291a..e5067c9c9 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java
@@ -81,6 +81,16 @@ public class CudaNativeRandom extends NativeRandom {
         return seed;
     }
 
+    @Override
+    public float nextFloat() {
+        return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
+    }
+
+    @Override
+    public double nextDouble() {
+        return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
+    }
+
     @Override
     public int nextInt() {
         return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
index 59496d780..ad9503849 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
@@ -3098,6 +3098,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen
 public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr);
 public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/);
 public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr);
+public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
+public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native void deleteRandomGenerator(OpaqueRandomGenerator ptr);
@@ -5048,6 +5050,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #include <array/DataTypeUtils.h>
 // #include <helpers/logger.h>
 // #include <stdexcept>
+// #include <math/templatemath.h>
 
 // #ifdef __CUDACC__
 // #endif
@@ -5064,6 +5067,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
                 return (RandomGenerator)super.position(position);
             }
         
+            public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index);
+            public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index);
             public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); }
             private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/);
             public RandomGenerator() { super((Pointer)null); allocate(); }
@@ -5094,7 +5099,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
             public native int relativeInt(@Cast("Nd4jLong") long index);
             public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index);
 
-            public native void rewindH(@Cast("Nd4jLong") long steps);
+            public native void rewindH(@Cast("uint64_t") long steps);
 
             /**
              * These methods set up only node states, with non-changed root ones
@@ -5126,6 +5131,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
         
 
         
+
+        
+
+        
 
         
 
@@ -5141,6 +5150,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 
         @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k);
 
+        @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3);
+
         
 
         
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java
index 2a2cff200..96219e8ff 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java
@@ -75,6 +75,16 @@ public class CpuNativeRandom extends NativeRandom {
         return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
     }
 
+    @Override
+    public float nextFloat() {
+        return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
+    }
+
+    @Override
+    public double nextDouble() {
+        return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
+    }
+
     @Override
     public long nextLong() {
         return nativeOps.getRandomGeneratorRelativeLong((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement());
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index b9e4adb5a..402b096c6 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -3102,6 +3102,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen
 public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr);
 public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/);
 public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr);
+public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
+public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index);
 public native void deleteRandomGenerator(OpaqueRandomGenerator ptr);
@@ -5052,6 +5054,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #include <array/DataTypeUtils.h>
 // #include <helpers/logger.h>
 // #include <stdexcept>
+// #include <math/templatemath.h>
 
 // #ifdef __CUDACC__
 // #endif
@@ -5068,6 +5071,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
                 return (RandomGenerator)super.position(position);
             }
         
+            public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index);
+            public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index);
             public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); }
             private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/);
             public RandomGenerator() { super((Pointer)null); allocate(); }
@@ -5098,7 +5103,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
             public native int relativeInt(@Cast("Nd4jLong") long index);
             public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index);
 
-            public native void rewindH(@Cast("Nd4jLong") long steps);
+            public native void rewindH(@Cast("uint64_t") long steps);
 
             /**
              * These methods set up only node states, with non-changed root ones
@@ -5130,6 +5135,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
         
 
         
+
+        
+
+        
 
         
 
@@ -5145,6 +5154,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 
         @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k);
 
+        @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3);
+
         
 
         
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java
index d784fb390..4e885db96 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java
@@ -1514,6 +1514,28 @@ public class RandomTests extends BaseNd4jTest {
         assertEquals(res[0], res1[0]);
     }
 
+
+    @Test
+    public void testRandom() {
+        val r1 = new java.util.Random(119);
+        val r2 = Nd4j.getRandom();
+        r2.setSeed(119);
+        float jmax = 0.0f;
+        float nmax = 0.0f;
+        for (int e = 0; e < 100_000_000; e++) {
+            val f = r1.nextFloat();
+            val n = r2.nextFloat();
+            if (f > jmax)
+                jmax = f;
+
+            if (n > nmax)
+                nmax = n;
+        }
+
+        assertTrue(jmax < 1.0);
+        assertTrue(nmax < 1.0);
+    }
+
     @Override
     public char ordering() {
         return 'c';

From c783a5938a57a45bc12743af0fad45e51e040ff0 Mon Sep 17 00:00:00 2001
From: Abdelrauf <qwr@live.ru>
Date: Mon, 1 Jun 2020 12:47:21 +0400
Subject: [PATCH 18/21] Cmake compilation helper module  (#478)

* Cmake: generate compilation units
Corrections. Added loops/cpu compilation units
Config files

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Cmake compilation helper module: fix cmake variable scope and some typos

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Cmake compilation helper: added other generations. should fix cuda
compilation

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* crop_and_resize adopted new compilation units setup

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Cmake Compilation Helper: Added comments and some configurations file

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* minor fix

Signed-off-by: Abdelrauf <rauf@konduit.ai>

Co-authored-by: raver119@gmail.com <raver119@gmail.com>
---
 libnd4j/CMakeLists.txt                        |   4 +
 libnd4j/blas/CMakeLists.txt                   |  27 +++--
 libnd4j/cmake/GenCompilation.cmake            | 103 ++++++++++++++++++
 ...0.cpp => IndexReductionLoops_int32.cpp.in} |   7 +-
 .../cpu/loops/IndexReductionLoops_int32_2.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_3.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_4.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_5.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_6.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_7.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_8.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int32_9.cpp |  24 ----
 ...1.cpp => IndexReductionLoops_int64.cpp.in} |   6 +-
 .../cpu/loops/IndexReductionLoops_int64_0.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_1.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_2.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_3.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_4.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_5.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_6.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_7.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_8.cpp |  24 ----
 .../cpu/loops/IndexReductionLoops_int64_9.cpp |  24 ----
 .../cpu/loops/Reduction3Loops.cpp.in}         |   9 +-
 ...uction3Loops_0.cpp => Reduction3Loops.hpp} |   3 +-
 .../helpers/cpu/loops/Reduction3Loops_1.cpp   |  60 ----------
 .../helpers/cpu/loops/Reduction3Loops_2.cpp   |  60 ----------
 .../helpers/cpu/loops/Reduction3Loops_3.cpp   |  60 ----------
 .../cpu/loops/ReductionLoops_float.cpp.in}    |  12 +-
 ...s_float_0.cpp => ReductionLoops_float.hpp} |   3 +-
 .../cpu/loops/ReductionLoops_float_1.cpp      |  52 ---------
 .../cpu/loops/ReductionLoops_float_2.cpp      |  49 ---------
 .../cpu/loops/ReductionLoops_float_3.cpp      |  49 ---------
 ...st_bool_p0.cpp => broadcast_bool_p.cpp.in} |   5 +-
 .../compilation_units/broadcast_bool_p1.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p2.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p3.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p4.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p5.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p6.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p7.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p8.cpp   |  27 -----
 .../compilation_units/broadcast_bool_p9.cpp   |  27 -----
 ...cast_int_p0.cpp => broadcast_int_p.cpp.in} |   5 +-
 .../compilation_units/broadcast_int_p1.cpp    |  27 -----
 .../compilation_units/broadcast_int_p2.cpp    |  27 -----
 .../compilation_units/broadcast_int_p3.cpp    |  27 -----
 .../compilation_units/broadcast_int_p4.cpp    |  27 -----
 .../compilation_units/broadcast_int_p5.cpp    |  27 -----
 .../compilation_units/broadcast_int_p6.cpp    |  27 -----
 .../compilation_units/broadcast_int_p7.cpp    |  27 -----
 .../{broadcast_p0.cpp => broadcast_p.cpp.in}  |   6 +-
 .../cpu/compilation_units/broadcast_p1.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p10.cpp   |  27 -----
 .../cpu/compilation_units/broadcast_p11.cpp   |  27 -----
 .../cpu/compilation_units/broadcast_p12.cpp   |  27 -----
 .../cpu/compilation_units/broadcast_p2.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p3.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p4.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p5.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p6.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p7.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p8.cpp    |  27 -----
 .../cpu/compilation_units/broadcast_p9.cpp    |  27 -----
 ...e_int32_0.cpp => indexreduce_int32.cpp.in} |   6 +-
 .../compilation_units/indexreduce_int32_2.cpp |  28 -----
 .../compilation_units/indexreduce_int32_3.cpp |  28 -----
 .../compilation_units/indexreduce_int32_4.cpp |  28 -----
 .../compilation_units/indexreduce_int32_5.cpp |  28 -----
 .../compilation_units/indexreduce_int32_6.cpp |  28 -----
 .../compilation_units/indexreduce_int32_7.cpp |  28 -----
 .../compilation_units/indexreduce_int32_8.cpp |  28 -----
 .../compilation_units/indexreduce_int32_9.cpp |  28 -----
 ...e_int32_1.cpp => indexreduce_int64.cpp.in} |   6 +-
 .../compilation_units/indexreduce_int64_0.cpp |  28 -----
 .../compilation_units/indexreduce_int64_1.cpp |  28 -----
 .../compilation_units/indexreduce_int64_2.cpp |  28 -----
 .../compilation_units/indexreduce_int64_3.cpp |  28 -----
 .../compilation_units/indexreduce_int64_4.cpp |  28 -----
 .../compilation_units/indexreduce_int64_5.cpp |  28 -----
 .../compilation_units/indexreduce_int64_6.cpp |  28 -----
 .../compilation_units/indexreduce_int64_7.cpp |  28 -----
 .../compilation_units/indexreduce_int64_8.cpp |  28 -----
 .../compilation_units/indexreduce_int64_9.cpp |  28 -----
 .../{pairwise_p0.cpp => pairwise_p.cpp.in}    |   6 +-
 .../cpu/compilation_units/pairwise_p1.cpp     |  28 -----
 .../cpu/compilation_units/pairwise_p10.cpp    |  27 -----
 .../cpu/compilation_units/pairwise_p11.cpp    |  27 -----
 .../cpu/compilation_units/pairwise_p12.cpp    |  27 -----
 .../cpu/compilation_units/pairwise_p2.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p3.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p4.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p5.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p6.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p7.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p8.cpp     |  27 -----
 .../cpu/compilation_units/pairwise_p9.cpp     |  27 -----
 .../{random_0.cpp => random.cpp.in}           |   6 +-
 .../loops/cpu/compilation_units/random_1.cpp  |  27 -----
 .../loops/cpu/compilation_units/random_2.cpp  |  27 -----
 .../loops/cpu/compilation_units/random_3.cpp  |  27 -----
 ...bfloat16_0.cpp => reduce3_bfloat16.cpp.in} |   6 +-
 .../compilation_units/reduce3_bfloat16_4.cpp  |  28 -----
 .../compilation_units/reduce3_bfloat16_5.cpp  |  28 -----
 .../compilation_units/reduce3_bfloat16_6.cpp  |  28 -----
 .../compilation_units/reduce3_bfloat16_7.cpp  |  28 -----
 .../compilation_units/reduce3_bfloat16_8.cpp  |  28 -----
 .../compilation_units/reduce3_bfloat16_9.cpp  |  28 -----
 ...3_bfloat16_1.cpp => reduce3_double.cpp.in} |   6 +-
 .../compilation_units/reduce3_double_0.cpp    |  28 -----
 .../compilation_units/reduce3_double_1.cpp    |  28 -----
 .../compilation_units/reduce3_double_2.cpp    |  28 -----
 .../compilation_units/reduce3_double_3.cpp    |  28 -----
 .../compilation_units/reduce3_double_4.cpp    |  28 -----
 .../compilation_units/reduce3_double_5.cpp    |  28 -----
 .../compilation_units/reduce3_double_6.cpp    |  28 -----
 .../compilation_units/reduce3_double_7.cpp    |  28 -----
 .../compilation_units/reduce3_double_8.cpp    |  28 -----
 .../compilation_units/reduce3_double_9.cpp    |  28 -----
 ...e3_bfloat16_2.cpp => reduce3_float.cpp.in} |   6 +-
 ..._bfloat16_3.cpp => reduce3_float16.cpp.in} |   6 +-
 .../compilation_units/reduce3_float16_0.cpp   |  28 -----
 .../compilation_units/reduce3_float16_1.cpp   |  28 -----
 .../compilation_units/reduce3_float16_2.cpp   |  28 -----
 .../compilation_units/reduce3_float16_3.cpp   |  28 -----
 .../compilation_units/reduce3_float16_4.cpp   |  28 -----
 .../compilation_units/reduce3_float16_5.cpp   |  28 -----
 .../compilation_units/reduce3_float16_6.cpp   |  28 -----
 .../compilation_units/reduce3_float16_7.cpp   |  28 -----
 .../compilation_units/reduce3_float16_8.cpp   |  28 -----
 .../compilation_units/reduce3_float16_9.cpp   |  28 -----
 .../cpu/compilation_units/reduce3_float_0.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_1.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_2.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_3.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_4.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_5.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_6.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_7.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_8.cpp |  28 -----
 .../cpu/compilation_units/reduce3_float_9.cpp |  28 -----
 ...reduce_float_0.cpp => reduce_float.cpp.in} |   6 +-
 .../cpu/compilation_units/reduce_float_1.cpp  |  28 -----
 .../cpu/compilation_units/reduce_float_2.cpp  |  28 -----
 .../cpu/compilation_units/reduce_float_3.cpp  |  28 -----
 .../{scalar_p0.cpp => scalar_p.cpp.in}        |   6 +-
 .../loops/cpu/compilation_units/scalar_p1.cpp |  27 -----
 .../cpu/compilation_units/scalar_p10.cpp      |  27 -----
 .../cpu/compilation_units/scalar_p11.cpp      |  27 -----
 .../cpu/compilation_units/scalar_p12.cpp      |  27 -----
 .../loops/cpu/compilation_units/scalar_p2.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p3.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p4.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p5.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p6.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p7.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p8.cpp |  27 -----
 .../loops/cpu/compilation_units/scalar_p9.cpp |  27 -----
 .../broadcasting_0.cu => broadcasting.cu.in}  |   6 +-
 .../broadcasting/broadcasting_1.cu            |  27 -----
 .../broadcasting/broadcasting_10.cu           |  27 -----
 .../broadcasting/broadcasting_11.cu           |  27 -----
 .../broadcasting/broadcasting_12.cu           |  27 -----
 .../broadcasting/broadcasting_2.cu            |  27 -----
 .../broadcasting/broadcasting_3.cu            |  27 -----
 .../broadcasting/broadcasting_4.cu            |  27 -----
 .../broadcasting/broadcasting_5.cu            |  27 -----
 .../broadcasting/broadcasting_6.cu            |  27 -----
 .../broadcasting/broadcasting_7.cu            |  27 -----
 .../broadcasting/broadcasting_8.cu            |  27 -----
 .../broadcasting/broadcasting_9.cu            |  27 -----
 .../pairwise_0.cu => pairwise.cu.in}          |   6 +-
 .../compilation_units/pairwise/pairwise_1.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_10.cu |  27 -----
 .../compilation_units/pairwise/pairwise_11.cu |  27 -----
 .../compilation_units/pairwise/pairwise_12.cu |  27 -----
 .../compilation_units/pairwise/pairwise_2.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_3.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_4.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_5.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_6.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_7.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_8.cu  |  27 -----
 .../compilation_units/pairwise/pairwise_9.cu  |  27 -----
 .../cuda/compilation_units/reduce3.cu.in      |  27 +++++
 .../compilation_units/reduce3/reduce3_0.cu    |  27 -----
 .../compilation_units/reduce3/reduce3_1.cu    |  27 -----
 .../compilation_units/reduce3/reduce3_2.cu    |  27 -----
 .../compilation_units/reduce3/reduce3_3.cu    |  27 -----
 .../reduce_float_0.cu => reduce_float.cu.in}  |   6 +-
 .../reduce_float/reduce_float_1.cu            |  27 -----
 .../reduce_float/reduce_float_2.cu            |  27 -----
 .../reduce_float/reduce_float_3.cu            |  27 -----
 .../{scalar/scalar_0.cu => scalar.cu.in}      |   6 +-
 .../cuda/compilation_units/scalar/scalar_1.cu |  27 -----
 .../compilation_units/scalar/scalar_10.cu     |  27 -----
 .../compilation_units/scalar/scalar_11.cu     |  27 -----
 .../compilation_units/scalar/scalar_12.cu     |  27 -----
 .../cuda/compilation_units/scalar/scalar_2.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_3.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_4.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_5.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_6.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_7.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_8.cu |  27 -----
 .../cuda/compilation_units/scalar/scalar_9.cu |  27 -----
 .../cpu/compilation_units/argamax.cpp.in      |  10 +-
 .../cpu/compilation_units/argamin.cpp.in      |   1 +
 .../cpu/compilation_units/argmax.cpp.in       |   1 +
 .../cpu/compilation_units/argmin.cpp.in       |   2 +
 ...nd_resize_3.cpp => crop_and_resize.cpp.in} |   7 +-
 .../crop_and_resize/crop_and_resize_0.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_1.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_2.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_4.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_5.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_6.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_7.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_8.cpp     |  30 -----
 .../crop_and_resize/crop_and_resize_9.cpp     |  30 -----
 ...ls_double_0.cpp => specials_double.cpp.in} |   8 +-
 .../compilation_units/specials_double_3.cpp   |  26 -----
 .../compilation_units/specials_double_4.cpp   |  26 -----
 .../compilation_units/specials_double_5.cpp   |  26 -----
 .../compilation_units/specials_double_6.cpp   |  26 -----
 .../compilation_units/specials_double_7.cpp   |  26 -----
 .../compilation_units/specials_double_8.cpp   |  26 -----
 .../compilation_units/specials_double_9.cpp   |  26 -----
 ...ls_single_0.cpp => specials_single.cpp.in} |   6 +-
 .../compilation_units/specials_single_1.cpp   |  26 -----
 .../compilation_units/specials_single_2.cpp   |  26 -----
 .../compilation_units/specials_single_3.cpp   |  26 -----
 .../compilation_units/specials_single_4.cpp   |  26 -----
 .../compilation_units/specials_single_5.cpp   |  26 -----
 .../compilation_units/specials_single_6.cpp   |  26 -----
 .../compilation_units/specials_single_7.cpp   |  26 -----
 .../compilation_units/specials_single_8.cpp   |  26 -----
 .../compilation_units/specials_single_9.cpp   |  26 -----
 .../tests_cpu/libnd4j_tests/CMakeLists.txt    |  15 +--
 239 files changed, 252 insertions(+), 5790 deletions(-)
 create mode 100644 libnd4j/cmake/GenCompilation.cmake
 rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int32_0.cpp => IndexReductionLoops_int32.cpp.in} (88%)
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp
 rename libnd4j/include/helpers/cpu/loops/{IndexReductionLoops_int32_1.cpp => IndexReductionLoops_int64.cpp.in} (88%)
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp
 rename libnd4j/include/{ops/impl/compilation_units/specials_double_1.cpp => helpers/cpu/loops/Reduction3Loops.cpp.in} (80%)
 rename libnd4j/include/helpers/cpu/loops/{Reduction3Loops_0.cpp => Reduction3Loops.hpp} (96%)
 delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp
 rename libnd4j/include/{ops/impl/compilation_units/specials_double_2.cpp => helpers/cpu/loops/ReductionLoops_float.cpp.in} (79%)
 rename libnd4j/include/helpers/cpu/loops/{ReductionLoops_float_0.cpp => ReductionLoops_float.hpp} (95%)
 delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp
 delete mode 100644 libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{broadcast_bool_p0.cpp => broadcast_bool_p.cpp.in} (87%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{broadcast_int_p0.cpp => broadcast_int_p.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{broadcast_p0.cpp => broadcast_p.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int32_0.cpp => indexreduce_int32.cpp.in} (87%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{indexreduce_int32_1.cpp => indexreduce_int64.cpp.in} (87%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{pairwise_p0.cpp => pairwise_p.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{random_0.cpp => random.cpp.in} (90%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/random_3.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_0.cpp => reduce3_bfloat16.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_1.cpp => reduce3_double.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_2.cpp => reduce3_float.cpp.in} (89%)
 rename libnd4j/include/loops/cpu/compilation_units/{reduce3_bfloat16_3.cpp => reduce3_float16.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{reduce_float_0.cpp => reduce_float.cpp.in} (87%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp
 rename libnd4j/include/loops/cpu/compilation_units/{scalar_p0.cpp => scalar_p.cpp.in} (89%)
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp
 delete mode 100644 libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp
 rename libnd4j/include/loops/cuda/compilation_units/{broadcasting/broadcasting_0.cu => broadcasting.cu.in} (89%)
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu
 rename libnd4j/include/loops/cuda/compilation_units/{pairwise/pairwise_0.cu => pairwise.cu.in} (88%)
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu
 create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu
 rename libnd4j/include/loops/cuda/compilation_units/{reduce_float/reduce_float_0.cu => reduce_float.cu.in} (87%)
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu
 rename libnd4j/include/loops/cuda/compilation_units/{scalar/scalar_0.cu => scalar.cu.in} (89%)
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu
 delete mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu
 rename libnd4j/include/ops/declarable/helpers/cpu/compilation_units/{crop_and_resize/crop_and_resize_3.cpp => crop_and_resize.cpp.in} (84%)
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
 delete mode 100644 libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
 rename libnd4j/include/ops/impl/compilation_units/{specials_double_0.cpp => specials_double.cpp.in} (86%)
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp
 rename libnd4j/include/ops/impl/compilation_units/{specials_single_0.cpp => specials_single.cpp.in} (90%)
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp
 delete mode 100644 libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp

diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt
index 3376bd6b6..0c78b3409 100755
--- a/libnd4j/CMakeLists.txt
+++ b/libnd4j/CMakeLists.txt
@@ -17,6 +17,10 @@ option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" OF
 set(FLATBUFFERS_BUILD_FLATC "OFF" CACHE STRING "Hack to disable flatc build" FORCE)
 
 set(CMAKE_CXX_STANDARD 11)
+
+
+include(GenCompilation)
+
 if (SD_CUDA)
     enable_language(CUDA)
     set(CMAKE_CUDA_STANDARD 11)
diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt
index 8419cdd4c..fb1dc066e 100755
--- a/libnd4j/blas/CMakeLists.txt
+++ b/libnd4j/blas/CMakeLists.txt
@@ -226,6 +226,14 @@ if(SD_CUDA)
         file(GLOB_RECURSE LEGACY_SOURCES false ../include/legacy/impl/*.cpp  ../include/legacy/*.cu ../include/legacy/*.h)
         file(GLOB_RECURSE LOOPS_SOURCES_CUDA false ../include/loops/*.cu)
 
+
+        file(GLOB_RECURSE COMPILATION_UNITS false ../include/loops/cuda/compilation_units/*.cu.in
+          ../include/ops/impl/compilation_units/*.cpp.in)
+
+        foreach(FL_ITEM ${COMPILATION_UNITS})   
+            genCompilation(FL_ITEM)
+        endforeach() 
+
         if (HAVE_CUDNN)
             message("cuDNN included")
             file(GLOB_RECURSE CUSTOMOPS_CUDNN_SOURCES false ../include/ops/declarable/platform/cudnn/*.cu)
@@ -234,7 +242,9 @@ if(SD_CUDA)
 		add_library(samediff_obj OBJECT ${LOOPS_SOURCES_CUDA} ${LEGACY_SOURCES}
                 ${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES}
                 ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
-                ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES})
+                ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES}
+				${CUSTOMOPS_GENERIC_SOURCES}
+				)
 
         if (WIN32)
             message("MSVC runtime for library: ${MSVC_RT_LIB}")
@@ -295,15 +305,12 @@ elseif(SD_CPU)
     file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/*.cpp ../include/loops/*.h)
 
 
-    file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in)
-    foreach(FL_ITEM ${COMPILATION_UNITS})  
-                string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM})
-                set(FL_ITEM_WLE ${CMAKE_MATCH_1})
-                foreach(FL_TYPE_INDEX RANGE 0 9)
-                    #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
-                    configure_file(  "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY)
-                    LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp )
-                endforeach() 
+    file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in 
+    ../include/loops/cpu/compilation_units/*.cpp.in ../include/helpers/cpu/loops/*.cpp.in
+    ../include/ops/impl/compilation_units/*.cpp.in)
+
+    foreach(FL_ITEM ${COMPILATION_UNITS})   
+        genCompilation(FL_ITEM)
     endforeach() 
 
     if (SD_X86_BUILD)
diff --git a/libnd4j/cmake/GenCompilation.cmake b/libnd4j/cmake/GenCompilation.cmake
new file mode 100644
index 000000000..9f977633d
--- /dev/null
+++ b/libnd4j/cmake/GenCompilation.cmake
@@ -0,0 +1,103 @@
+################################################################################
+# Copyright (c) 2020 Konduit K.K.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License, Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+################################################################################
+
+#///////////////////////////////////////////////////////////////////////////////
+# genCompilation: Generates cpp, cu files
+# INPUT:
+# $FILE_ITEM template-configuration that utilizes libnd4j type, macros helpers 
+# defined inside { include/types/types.h, include/system/type_boilerplate.h}
+# OUTPUT:
+# $CUSTOMOPS_GENERIC_SOURCES  generated files will be added into this List
+#////////////////////////////////////////////////////////////////////////////////
+#  A simple template-configuration file example:
+# // hints and defines what types will be generated
+# #cmakedefine LIBND4J_TYPE_GEN 
+# #cmakedefine FLOAT_TYPE_GEN 
+# // below if defines blocks are needed for correctly handling multiple types
+# #if  defined(LIBND4J_TYPE_GEN)
+#  BUILD_DOUBLE_TEMPLATE(template void someFunc, (arg_list,..), 
+#                          LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+# #endif
+# #if defined(FLOAT_TYPE_GEN)
+#  BUILD_SINGLE_TEMPLATE(template class SomeClass,, FLOAT_TYPES_@FL_TYPE_INDEX@);
+# #endif
+#////////////////////////////////////////////////////////////////////////////////
+
+function(genCompilation FILE_ITEM) 
+    get_filename_component(FILE_ITEM_WE ${FL_ITEM} NAME_WE)  
+
+    set(EXTENSION "cpp")
+
+    if(FL_ITEM MATCHES "cu.in$")
+         set(EXTENSION "cu") 
+    endif()
+
+    file(READ ${FL_ITEM} CONTENT_FL)
+    #check content for types
+
+    #set all to false 
+    set (FLOAT_TYPE_GEN     0) 
+    set (INT_TYPE_GEN       0) 
+    set (LIBND4J_TYPE_GEN   0) 
+    set (PAIRWISE_TYPE_GEN  0)
+    set (RANGE_STOP         -1)
+
+    string(REGEX MATCHALL "#cmakedefine[ \t]+[^_]+_TYPE_GEN" TYPE_MATCHES ${CONTENT_FL})
+
+    foreach(TYPEX ${TYPE_MATCHES})   
+        set(STOP -1)
+        if(TYPEX MATCHES "INT_TYPE_GEN$")
+           set (INT_TYPE_GEN  1)
+           set(STOP 7)
+        endif()
+        if(TYPEX MATCHES "LIBND4J_TYPE_GEN$")
+           set (LIBND4J_TYPE_GEN 1)
+           set(STOP 9)
+        endif()
+        if(TYPEX MATCHES "FLOAT_TYPE_GEN$")
+           set (FLOAT_TYPE_GEN 1)
+           set(STOP 3)
+        endif()
+        if(TYPEX MATCHES "PAIRWISE_TYPE_GEN$")
+           set (PAIRWISE_TYPE_GEN  1)
+           set(STOP 12)
+        endif()
+        if(STOP GREATER RANGE_STOP) 
+           set(RANGE_STOP ${STOP})
+        endif()
+         
+    endforeach()  
+
+    if(RANGE_STOP GREATER -1)
+        foreach(FL_TYPE_INDEX RANGE 0 ${RANGE_STOP}) 
+            # set OFF if the index is above
+            if(FL_TYPE_INDEX GREATER 3)
+                 set (FLOAT_TYPE_GEN     0) 
+            endif()
+            if(FL_TYPE_INDEX GREATER 7)
+                 set (INT_TYPE_GEN     0) 
+            endif()
+            if(FL_TYPE_INDEX GREATER 9)
+                 set (LIBND4J_TYPE_GEN   0) 
+            endif()        
+            set(GENERATED_SOURCE  "${CMAKE_BINARY_DIR}/compilation_units/${FILE_ITEM_WE}_${FL_TYPE_INDEX}.${EXTENSION}")
+            configure_file(  "${FL_ITEM}" "${GENERATED_SOURCE}" @ONLY)
+            LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${GENERATED_SOURCE} )
+        endforeach()  
+    endif()
+
+    set(CUSTOMOPS_GENERIC_SOURCES ${CUSTOMOPS_GENERIC_SOURCES} PARENT_SCOPE)
+endfunction()
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in
similarity index 88%
rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp
rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in
index 97318dae8..2030c8017 100644
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp
+++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32.cpp.in
@@ -18,7 +18,6 @@
 //
 // @author Yurii Shyrma (iuriish@yahoo.com)
 //
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t));
\ No newline at end of file
+#include <helpers/cpu/loops/IndexReductionLoops.hpp>
+#cmakedefine  LIBND4J_TYPE_GEN
+BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp
deleted file mode 100644
index e22635b85..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp
deleted file mode 100644
index f85096f0a..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp
deleted file mode 100644
index 5272eba7e..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp
deleted file mode 100644
index 683d6d0c0..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp
deleted file mode 100644
index 0ff70b7b5..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp
deleted file mode 100644
index 64d93c5e3..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp
deleted file mode 100644
index dd586ab26..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp
deleted file mode 100644
index bb7ef80f7..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in
similarity index 88%
rename from libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp
rename to libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in
index 680bf7a64..0647ce17d 100644
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp
+++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64.cpp.in
@@ -19,6 +19,6 @@
 // @author Yurii Shyrma (iuriish@yahoo.com)
 //
 
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t));
\ No newline at end of file
+#include <helpers/cpu/loops/IndexReductionLoops.hpp>
+#cmakedefine  LIBND4J_TYPE_GEN
+BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp
deleted file mode 100644
index 8d0c55ce1..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp
deleted file mode 100644
index 7c5824559..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp
deleted file mode 100644
index 3bb6e6b7c..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp
deleted file mode 100644
index 49f977901..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp
deleted file mode 100644
index 73f0e9872..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp
deleted file mode 100644
index b27aaf341..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp
deleted file mode 100644
index 452184acd..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp
deleted file mode 100644
index 59cbc51cf..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp
deleted file mode 100644
index 51fc49cea..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp
deleted file mode 100644
index b774dde52..000000000
--- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author Yurii Shyrma (iuriish@yahoo.com)
-//
-
-#include "./IndexReductionLoops.hpp"
-
-BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong));
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in
similarity index 80%
rename from libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp
rename to libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in
index a61a98870..4f38b4d8f 100644
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_1.cpp
+++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.cpp.in
@@ -1,6 +1,5 @@
 /*******************************************************************************
  * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
  *
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
@@ -19,8 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../specials_double.hpp"
+#include <helpers/cpu/loops/Reduction3Loops.hpp>
+#cmakedefine FLOAT_TYPE_GEN
 
 namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_1);
-}
\ No newline at end of file
+  
+    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@);
+}
diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp
similarity index 96%
rename from libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp
rename to libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp
index 00b15673b..241dc7e8c 100644
--- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp
+++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops.hpp
@@ -55,6 +55,5 @@ namespace sd {
         DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo,  xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS);
 #endif
     }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_0);
+     
 }
diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp
deleted file mode 100644
index da8d3db7e..000000000
--- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include <helpers/Loops.h>
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3All<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo,  xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_1);
-}
diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp
deleted file mode 100644
index 06588a2fb..000000000
--- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include <helpers/Loops.h>
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3All<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo,  xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_2);
-}
diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp
deleted file mode 100644
index 405b0275b..000000000
--- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include <helpers/Loops.h>
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void Reduction3Loops<X,Z>::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        Reduction3Loops<X,Z>::template loopReduce3All<OpType>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void Reduction3Loops<X, Y>::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo,  xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_3);
-}
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in
similarity index 79%
rename from libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp
rename to libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in
index 89deb3d9c..5c1bb227d 100644
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_2.cpp
+++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.cpp.in
@@ -1,6 +1,5 @@
 /*******************************************************************************
  * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
  *
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
@@ -19,8 +18,11 @@
 // @author raver119@gmail.com
 //
 
-#include "../specials_double.hpp"
-
+#include <helpers/cpu/loops/ReductionLoops_float.hpp>
+#cmakedefine FLOAT_TYPE_GEN
 namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_2);
-}
\ No newline at end of file
+  
+    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@);
+}
+
+
diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp
similarity index 95%
rename from libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp
rename to libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp
index a3879bee3..c7ed544b2 100644
--- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp
+++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float.hpp
@@ -44,8 +44,7 @@ namespace sd {
         DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS);
 #endif
     }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_0);
+     
 }
 
 
diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp
deleted file mode 100644
index 6dd555037..000000000
--- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "ReductionLoops.hpp"
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void ReductionFloatLoops<X, Z>::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        ReductionLoops<X,Z,Z>::template loopReduce<OpType>(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void ReductionFloatLoops<X, Y>::wrapper(const int opNum,
-                                            const X *x, const Nd4jLong *xShapeInfo,
-                                            Y *z, const Nd4jLong *zShapeInfo,
-                                            const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets,
-                                            Y *extraParams,
-                                            int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_1);
-}
-
-
diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp
deleted file mode 100644
index ce1042b88..000000000
--- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "ReductionLoops.hpp"
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void ReductionFloatLoops<X, Z>::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        ReductionLoops<X,Z,Z>::template loopReduce<OpType>(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void ReductionFloatLoops<X, Y>::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z,
-                                            const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo,
-                                            const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_2);
-}
-
-
diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp
deleted file mode 100644
index 6cfac93bc..000000000
--- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "ReductionLoops.hpp"
-#include <system/pointercast.h>
-#include <types/types.h>
-
-using namespace simdOps;
-
-namespace sd {
-
-    template<typename X, typename Z>
-    template <typename OpType>
-    void ReductionFloatLoops<X, Z>::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        ReductionLoops<X,Z,Z>::template loopReduce<OpType>(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop);
-#endif
-    }
-
-    template<typename X, typename Y>
-    void ReductionFloatLoops<X, Y>::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z,
-                                            const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo,
-                                            const  Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) {
-#ifndef INLINE_LOOPS
-        DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS);
-#endif
-    }
-
-    BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_3);
-}
-
-
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in
similarity index 87%
rename from libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in
index 08ebd92f7..b3c60462b 100644
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p.cpp.in
@@ -18,10 +18,11 @@
 // Created by raver119 on 23/09/18.
 //
 
-#include "../broadcasting_bool.hpp"
+#include <loops/cpu/broadcasting_bool.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 
 namespace functions {
     namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_0, BOOL_TYPES);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_@FL_TYPE_INDEX@, BOOL_TYPES);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp
deleted file mode 100644
index 16e4c817a..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p1.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_1, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp
deleted file mode 100644
index 10b32ca41..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_2, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp
deleted file mode 100644
index 547ddd371..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_3, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp
deleted file mode 100644
index 3c7dee0a0..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p4.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_4, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp
deleted file mode 100644
index b71925dab..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p5.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_5, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp
deleted file mode 100644
index 23eedd289..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p6.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_6, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp
deleted file mode 100644
index c18e7641e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p7.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_7, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp
deleted file mode 100644
index efee34519..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p8.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_8, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp
deleted file mode 100644
index 2ab193285..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_bool_p9.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_bool.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT BroadcastBool, , LIBND4J_TYPES_9, BOOL_TYPES);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in
index d3f5ada43..a36c1a0b2 100644
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p.cpp.in
@@ -18,10 +18,11 @@
 // Created by raver119 on 23/09/18.
 //
 
-#include "../broadcasting_int.hpp"
+#include <loops/cpu/broadcasting_int.hpp>
+#cmakedefine INT_TYPE_GEN
 
 namespace functions {
     namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_0);
+        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp
deleted file mode 100644
index 82969bdb0..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p1.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp
deleted file mode 100644
index 53d928111..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp
deleted file mode 100644
index eba7b78d1..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp
deleted file mode 100644
index 47b7350f2..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p4.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp
deleted file mode 100644
index 3afad08f6..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p5.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp
deleted file mode 100644
index 286c2680f..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p6.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp
deleted file mode 100644
index 242441561..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_int_p7.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting_int.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT BroadcastInt, , INTEGER_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in
index 943186a8a..1dbb4aac4 100644
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/broadcast_p.cpp.in
@@ -18,10 +18,10 @@
 // Created by raver119 on 23/09/18.
 //
 
-#include "../broadcasting.hpp"
-
+#include <loops/cpu/broadcasting.hpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp
deleted file mode 100644
index b38a1c801..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p1.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp
deleted file mode 100644
index 983305007..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p10.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp
deleted file mode 100644
index 206b14763..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p11.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp
deleted file mode 100644
index 825c07adf..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p12.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp
deleted file mode 100644
index 341f1afb4..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp
deleted file mode 100644
index 9aa4c227b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp
deleted file mode 100644
index 7f68bb1f8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p4.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp
deleted file mode 100644
index d2e586bf8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p5.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp
deleted file mode 100644
index a9db2f7f8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p6.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp
deleted file mode 100644
index 9a2111ee5..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p7.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp
deleted file mode 100644
index 4bbd88ba6..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p8.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp
deleted file mode 100644
index 406a8f8e2..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/broadcast_p9.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "../broadcasting.hpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in
similarity index 87%
rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in
index 89b85485a..97402d38e 100644
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../indexreduce.hpp"
-
+#include <loops/cpu/indexreduce.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 namespace functions {
     namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t));
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT32, int32_t));
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp
deleted file mode 100644
index 47dce2d5a..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp
deleted file mode 100644
index c3d33e7f1..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp
deleted file mode 100644
index 37a81e441..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp
deleted file mode 100644
index 1d6555ddf..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp
deleted file mode 100644
index 0bb8aef4d..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp
deleted file mode 100644
index a7d3c733f..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp
deleted file mode 100644
index 8c5de9653..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp
deleted file mode 100644
index f61d604e2..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in
similarity index 87%
rename from libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp
rename to libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in
index ada7844cb..30fa30749 100644
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int32_1.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../indexreduce.hpp"
-
+#include <loops/cpu/indexreduce.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 namespace functions {
     namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t));
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_@FL_TYPE_INDEX@, (sd::DataType::INT64, Nd4jLong));
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp
deleted file mode 100644
index d399f5e0e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_0.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp
deleted file mode 100644
index c4df4d2e4..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp
deleted file mode 100644
index 538e369eb..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp
deleted file mode 100644
index b0d082bce..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp
deleted file mode 100644
index 98e13bb63..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp
deleted file mode 100644
index 4b7f599d9..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp
deleted file mode 100644
index 8d7de9822..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp
deleted file mode 100644
index 8f9befddb..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp
deleted file mode 100644
index b38112631..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp b/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp
deleted file mode 100644
index baacdc432..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/indexreduce_int64_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../indexreduce.hpp"
-
-namespace functions {
-    namespace indexreduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong));
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in
index d498a4400..bbf809de8 100644
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/pairwise_p.cpp.in
@@ -18,10 +18,10 @@
 // Created by raver119 on 23/09/18.
 //
 
-#include "loops/cpu/pairwise.hpp"
-
+#include <loops/cpu/pairwise.hpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp
deleted file mode 100644
index 2a665d9d2..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1);
-    }
-
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp
deleted file mode 100644
index 4a8aaf94a..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p10.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp
deleted file mode 100644
index 1f4eb1389..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p11.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp
deleted file mode 100644
index 3c0984db9..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p12.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp
deleted file mode 100644
index 0725ae862..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp
deleted file mode 100644
index f9dcf3519..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp
deleted file mode 100644
index a7b63427d..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p4.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp
deleted file mode 100644
index 3f8557ea9..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p5.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp
deleted file mode 100644
index 2b5dc9ed4..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p6.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp
deleted file mode 100644
index f5deef719..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p7.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp
deleted file mode 100644
index e2fa75bbb..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p8.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp
deleted file mode 100644
index eb3da276e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/pairwise_p9.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver119 on 23/09/18.
-//
-
-#include "loops/cpu/pairwise.hpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/random_0.cpp b/libnd4j/include/loops/cpu/compilation_units/random.cpp.in
similarity index 90%
rename from libnd4j/include/loops/cpu/compilation_units/random_0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/random.cpp.in
index 6424ccb6e..921532ac8 100644
--- a/libnd4j/include/loops/cpu/compilation_units/random_0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/random.cpp.in
@@ -18,10 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../random.hpp"
-
+#include <loops/cpu/random.hpp>
+#cmakedefine FLOAT_TYPE_GEN
 namespace functions {
     namespace random {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_0);
+        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/random_1.cpp b/libnd4j/include/loops/cpu/compilation_units/random_1.cpp
deleted file mode 100644
index 316d55bf6..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/random_1.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../random.hpp"
-
-namespace functions {
-    namespace random {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/random_2.cpp b/libnd4j/include/loops/cpu/compilation_units/random_2.cpp
deleted file mode 100644
index 90d080b63..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/random_2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../random.hpp"
-
-namespace functions {
-    namespace random {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/random_3.cpp b/libnd4j/include/loops/cpu/compilation_units/random_3.cpp
deleted file mode 100644
index 97e5211e8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/random_3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../random.hpp"
-
-namespace functions {
-    namespace random {
-        BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT RandomFunction, , FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in
index 19483c1df..68616c3f9 100644
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../reduce3.hpp"
-
+#include <loops/cpu/reduce3.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 namespace functions {
     namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_3);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_3);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp
deleted file mode 100644
index 0802e11f4..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp
deleted file mode 100644
index 87ec2d3f8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp
deleted file mode 100644
index 10dc7d69b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp
deleted file mode 100644
index 28ba56376..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp
deleted file mode 100644
index 8087f6a07..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp
deleted file mode 100644
index 4a5186cf0..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp
rename to libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in
index 88225bd85..5c722838d 100644
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_1.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_double.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../reduce3.hpp"
-
+#include <loops/cpu/reduce3.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 namespace functions {
     namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_3);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_2);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp
deleted file mode 100644
index 34172b4b3..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_0.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp
deleted file mode 100644
index c2f7c7e9c..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp
deleted file mode 100644
index 41c1dd679..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp
deleted file mode 100644
index a44085232..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp
deleted file mode 100644
index d346d175b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp
deleted file mode 100644
index 86cf48ff7..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp
deleted file mode 100644
index 92f7ac39e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp
deleted file mode 100644
index eb216f89f..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp
deleted file mode 100644
index d1e9f8c96..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp
deleted file mode 100644
index fa00bde19..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_double_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp
rename to libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in
index 7bed85c5d..ee127c2d9 100644
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_2.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../reduce3.hpp"
-
+#include <loops/cpu/reduce3.hpp>
+#cmakedefine  LIBND4J_TYPE_GEN
 namespace functions {
     namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_3);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_1);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp
rename to libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in
index 87042d342..65c2b563a 100644
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_bfloat16_3.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../reduce3.hpp"
-
+#include <loops/cpu/reduce3.hpp>
+#cmakedefine  LIBND4J_TYPE_GEN
 namespace functions {
     namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_3);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES_0);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp
deleted file mode 100644
index cb212b06b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_0.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp
deleted file mode 100644
index 4a7fdee8a..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp
deleted file mode 100644
index aaafe1bae..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp
deleted file mode 100644
index 9b8cf0c6a..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp
deleted file mode 100644
index 4d02ffe53..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp
deleted file mode 100644
index 88ce3e5e2..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp
deleted file mode 100644
index 26d4df1dd..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp
deleted file mode 100644
index 3b04f47aa..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp
deleted file mode 100644
index c87090229..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp
deleted file mode 100644
index d5acb3935..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float16_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp
deleted file mode 100644
index e7e1fab61..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_0.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_0, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp
deleted file mode 100644
index 98ccf8b35..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_1, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp
deleted file mode 100644
index 6782d74ed..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_2, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp
deleted file mode 100644
index 915b0ac0e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_3, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp
deleted file mode 100644
index d34e61181..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_4.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_4, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp
deleted file mode 100644
index 89a8f164f..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_5.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_5, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp
deleted file mode 100644
index 70e482b8b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_6.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_6, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp
deleted file mode 100644
index 88663cd7d..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_7.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_7, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp
deleted file mode 100644
index d5399a4d8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_8.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_8, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp
deleted file mode 100644
index e27e7ab12..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce3_float_9.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce3.hpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES_9, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in
similarity index 87%
rename from libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in
index de4619f29..3837c7810 100644
--- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/reduce_float.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../reduce/reduce_float.hpp"
-
+#include <loops/cpu/reduce/reduce_float.hpp>
+#cmakedefine FLOAT_TYPE_GEN
 namespace functions {
     namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_0);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@);
     }
 }
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp
deleted file mode 100644
index bfa88bc3b..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_1.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce/reduce_float.hpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_1);
-    }
-}
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp
deleted file mode 100644
index 8cc2795a4..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_2.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce/reduce_float.hpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_2);
-    }
-}
diff --git a/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp b/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp
deleted file mode 100644
index 0b94831c3..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/reduce_float_3.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../reduce/reduce_float.hpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_3);
-    }
-}
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in
similarity index 89%
rename from libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp
rename to libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in
index 32f670f46..dc024170d 100644
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p0.cpp
+++ b/libnd4j/include/loops/cpu/compilation_units/scalar_p.cpp.in
@@ -18,10 +18,10 @@
 // Created by raver on 9/28/2018.
 //
 
-#include "../scalar.hpp"
-
+#include <loops/cpu/scalar.hpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp
deleted file mode 100644
index 5146d70bd..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p1.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp
deleted file mode 100644
index 7175a8603..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p10.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp
deleted file mode 100644
index a6b7bafac..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p11.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp
deleted file mode 100644
index 69cbeb7ff..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p12.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp
deleted file mode 100644
index 1e0f25909..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p2.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp
deleted file mode 100644
index e4f2c6457..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp
deleted file mode 100644
index daabf9325..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p4.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp
deleted file mode 100644
index cadad858e..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p5.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp
deleted file mode 100644
index 7e56f65c7..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p6.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp
deleted file mode 100644
index 85cedcecd..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p7.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp
deleted file mode 100644
index d593889b8..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p8.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp b/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp
deleted file mode 100644
index 14eb788d7..000000000
--- a/libnd4j/include/loops/cpu/compilation_units/scalar_p9.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// Created by raver on 9/28/2018.
-//
-
-#include "../scalar.hpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in
similarity index 89%
rename from libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu
rename to libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in
index d7902af87..6349dcfc9 100644
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu
+++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting.cu.in
@@ -18,10 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../../broadcasting.chpp"
-
+#include <loops/cuda/broadcasting.chpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu
deleted file mode 100644
index b24ebdb6c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu
deleted file mode 100644
index 4d19a893c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_10.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu
deleted file mode 100644
index 8b643965b..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_11.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu
deleted file mode 100644
index 935297a53..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_12.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu
deleted file mode 100644
index 7d7fdc1b6..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu
deleted file mode 100644
index d5c09f114..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu
deleted file mode 100644
index f3c64a91a..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu
deleted file mode 100644
index 5ca557a30..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu
deleted file mode 100644
index 9c53e8b36..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu
deleted file mode 100644
index a64b6f0d3..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu
deleted file mode 100644
index 4404fed7c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu
deleted file mode 100644
index dbb560f5c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../broadcasting.chpp"
-
-namespace functions {
-    namespace broadcast {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in
similarity index 88%
rename from libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu
rename to libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in
index e57433ae2..312ed7416 100644
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu
+++ b/libnd4j/include/loops/cuda/compilation_units/pairwise.cu.in
@@ -18,10 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../../pairwise.chpp"
-
+#include <loops/cuda/pairwise.chpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu
deleted file mode 100644
index 513a2c056..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu
deleted file mode 100644
index fac835b18..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_10.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu
deleted file mode 100644
index f01ef7eb3..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_11.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu
deleted file mode 100644
index 8cf8c367f..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_12.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu
deleted file mode 100644
index 8e0261d14..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu
deleted file mode 100644
index 86c23344a..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu
deleted file mode 100644
index 1ac28891f..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu
deleted file mode 100644
index 713fe344c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu
deleted file mode 100644
index 0983be1e9..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu
deleted file mode 100644
index b12d82eac..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu
deleted file mode 100644
index fc1876f3d..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu
deleted file mode 100644
index f13c28e85..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../pairwise.chpp"
-
-namespace functions {
-    namespace pairwise_transforms {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in b/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in
new file mode 100644
index 000000000..dd7472836
--- /dev/null
+++ b/libnd4j/include/loops/cuda/compilation_units/reduce3.cu.in
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#include <loops/cuda/reduce3.chpp>
+#cmakedefine FLOAT_TYPE_GEN
+namespace functions {
+    namespace reduce3 {
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@);
+    }
+}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu
deleted file mode 100644
index d3aeadb5f..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce3.chpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_0);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu
deleted file mode 100644
index cfc7cb5f3..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce3.chpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu
deleted file mode 100644
index 754ac9f52..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce3.chpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu
deleted file mode 100644
index 340698b34..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce3.chpp"
-
-namespace functions {
-    namespace reduce3 {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in
similarity index 87%
rename from libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu
rename to libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in
index dd893939d..34c2bf8ca 100644
--- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu
+++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float.cu.in
@@ -18,10 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../../reduce/reduce_float.chpp"
-
+#include <loops/cuda/reduce/reduce_float.chpp>
+#cmakedefine FLOAT_TYPE_GEN
 namespace functions {
     namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_0);
+        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu
deleted file mode 100644
index 4d98cb61c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce/reduce_float.chpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu
deleted file mode 100644
index 346627563..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce/reduce_float.chpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu
deleted file mode 100644
index 2852063ad..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../reduce/reduce_float.chpp"
-
-namespace functions {
-    namespace reduce {
-        BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu b/libnd4j/include/loops/cuda/compilation_units/scalar.cu.in
similarity index 89%
rename from libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu
rename to libnd4j/include/loops/cuda/compilation_units/scalar.cu.in
index 28f754b14..15608bdd1 100644
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu
+++ b/libnd4j/include/loops/cuda/compilation_units/scalar.cu.in
@@ -18,10 +18,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../../scalar.chpp"
-
+#include <loops/cuda/scalar.chpp>
+#cmakedefine PAIRWISE_TYPE_GEN
 namespace functions {
     namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0);
+        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_@FL_TYPE_INDEX@);
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu
deleted file mode 100644
index fb54e4767..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu
deleted file mode 100644
index e06cad235..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_10.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_10);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu
deleted file mode 100644
index 3c5549339..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_11.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_11);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu
deleted file mode 100644
index 7f7f74156..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_12.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_12);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu
deleted file mode 100644
index af2de5b0e..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu
deleted file mode 100644
index a50cee507..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu
deleted file mode 100644
index 7f99764d8..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu
deleted file mode 100644
index 10e93e14c..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu
deleted file mode 100644
index a1a98cf41..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu
deleted file mode 100644
index f29d26c44..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu
deleted file mode 100644
index 38d275b6f..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu
deleted file mode 100644
index be7c66956..000000000
--- a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../../scalar.chpp"
-
-namespace functions {
-    namespace scalar {
-        BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9);
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
index 533a94aab..3cefacb37 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamax.cpp.in
@@ -19,10 +19,18 @@
 
 #include <ops/declarable/helpers/cpu/indexReductions.hpp>
 
+
+#cmakedefine LIBND4J_TYPE_GEN 
+
+#if defined(PAIRWISE_TYPE_GEN) || defined(INT_TYPE_GEN) || defined(FLOAT_TYPE_GEN)  || defined(LIBND4J_TYPE_GEN)
 namespace sd {
     namespace ops {
         namespace helpers {
+#if  defined(LIBND4J_TYPE_GEN)
             BUILD_DOUBLE_TEMPLATE(template void argAbsMax_, (const NDArray& input, NDArray& output, const std::vector<int>& dimensions), LIBND4J_TYPES_@FL_TYPE_INDEX@, INDEXING_TYPES);
+#endif
+ 
         }
     }
-}
\ No newline at end of file
+}
+#endif
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
index 4f7c78505..9de76d1de 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argamin.cpp.in
@@ -16,6 +16,7 @@
 //
 // @author AbdelRauf
 //
+#cmakedefine LIBND4J_TYPE_GEN 
 
 #include <ops/declarable/helpers/cpu/indexReductions.hpp>
 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
index 770f155f4..112a91f9f 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmax.cpp.in
@@ -16,6 +16,7 @@
 //
 // @author AbdelRauf
 //
+#cmakedefine LIBND4J_TYPE_GEN 
 
 #include <ops/declarable/helpers/cpu/indexReductions.hpp>
 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
index 0149b890e..ff8ba1bf2 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/argmin.cpp.in
@@ -17,6 +17,8 @@
 // @author AbdelRauf
 //
 
+#cmakedefine LIBND4J_TYPE_GEN
+
 #include <ops/declarable/helpers/cpu/indexReductions.hpp>
 
 namespace sd {
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in
similarity index 84%
rename from libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp
rename to libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in
index 11175a02d..b0cdafebd 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_3.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize.cpp.in
@@ -1,5 +1,6 @@
 /*******************************************************************************
  * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2020 Konduit K.K.
  *
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
@@ -19,12 +20,14 @@
 //
 
 #include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
+#include <ops/declarable/helpers/cpu/crop_and_resize.hpp>
+
+#cmakedefine LIBND4J_TYPE_GEN
 
 namespace sd {
     namespace ops {
         namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_3, FLOAT_TYPES, INTEGER_TYPES);
+            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), LIBND4J_TYPES_@FL_TYPE_INDEX@, FLOAT_TYPES, INTEGER_TYPES);
         }
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
deleted file mode 100644
index 22258266b..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_0.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_0, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
deleted file mode 100644
index f2b891d5e..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_1.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_1, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
deleted file mode 100644
index c475d994c..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_2.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_2, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
deleted file mode 100644
index cea328084..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_4.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_4, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
deleted file mode 100644
index 81bb8e897..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_5.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_5, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
deleted file mode 100644
index 415ab39e2..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_6.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_6, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
deleted file mode 100644
index 47d16e6db..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_7.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_7, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
deleted file mode 100644
index 902ade68c..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_8.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_8, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
deleted file mode 100644
index 559564903..000000000
--- a/libnd4j/include/ops/declarable/helpers/cpu/compilation_units/crop_and_resize/crop_and_resize_9.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-//  @author sgazeos@gmail.com
-//
-
-#include <ops/declarable/helpers/crop_and_resize.h>
-#include "ops/declarable/helpers/cpu/crop_and_resize.hpp"
-
-namespace sd {
-    namespace ops {
-        namespace helpers {
-            BUILD_TRIPLE_TEMPLATE(template void cropAndResizeFunctor_, (NDArray const *images, NDArray const *boxes, NDArray const *indices, NDArray const *cropSize, int method, double extrapolationVal, NDArray *crops), NUMERIC_TYPES_9, FLOAT_TYPES, INTEGER_TYPES);
-        }
-    }
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in
similarity index 86%
rename from libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp
rename to libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in
index e9d262f58..00e0883f7 100644
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_0.cpp
+++ b/libnd4j/include/ops/impl/compilation_units/specials_double.cpp.in
@@ -19,10 +19,10 @@
 // @author raver119@gmail.com
 //
 
-#include "../specials_double.hpp"
-
+#include <ops/impl/specials_double.hpp>
+#cmakedefine LIBND4J_TYPE_GEN
 namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_0);
+    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_@FL_TYPE_INDEX@);
 
-    BUILD_DOUBLE_TEMPLATE(template void SpecialTypeConverter::convertGeneric, (Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES, LIBND4J_TYPES);
+    BUILD_DOUBLE_TEMPLATE(template void SpecialTypeConverter::convertGeneric, (Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES, LIBND4J_TYPES_@FL_TYPE_INDEX@);
 }
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp
deleted file mode 100644
index 7690749bf..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_3.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_3);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp
deleted file mode 100644
index 505ea9921..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_4.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_4);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp
deleted file mode 100644
index caa9d2dfa..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_5.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_5);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp
deleted file mode 100644
index 9646534a9..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_6.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_6);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp
deleted file mode 100644
index 3230c1fbc..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_7.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_7);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp
deleted file mode 100644
index a56b335b6..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_8.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_8);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp
deleted file mode 100644
index bb13c0415..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_double_9.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_double.hpp"
-
-namespace sd {
-    BUILD_DOUBLE_TEMPLATE(template class DoubleMethods, , LIBND4J_TYPES, LIBND4J_TYPES_9);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in
similarity index 90%
rename from libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp
rename to libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in
index f74717f05..49110d829 100644
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_0.cpp
+++ b/libnd4j/include/ops/impl/compilation_units/specials_single.cpp.in
@@ -19,8 +19,8 @@
 // @author raver119@gmail.com
 //
 
-#include "../specials_single.hpp"
-
+#include <ops/impl/specials_single.hpp> 
+#cmakedefine LIBND4J_TYPE_GEN
 namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_0);
+    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_@FL_TYPE_INDEX@);
 }
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp
deleted file mode 100644
index cbacbb60e..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_1.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_1);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp
deleted file mode 100644
index b1c7c0db6..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_2.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_2);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp
deleted file mode 100644
index d340500e5..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_3.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_3);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp
deleted file mode 100644
index b8ea2a933..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_4.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_4);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp
deleted file mode 100644
index cc3fe3f0b..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_5.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_5);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp
deleted file mode 100644
index 4e0b96a82..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_6.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_6);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp
deleted file mode 100644
index e8bd8d950..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_7.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_7);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp
deleted file mode 100644
index b2581352e..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_8.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_8);
-}
\ No newline at end of file
diff --git a/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp b/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp
deleted file mode 100644
index 5105affa8..000000000
--- a/libnd4j/include/ops/impl/compilation_units/specials_single_9.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015-2018 Skymind, Inc.
- * Copyright (c) 2019-2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-//
-// @author raver119@gmail.com
-//
-
-#include "../specials_single.hpp"
-
-namespace sd {
-    BUILD_SINGLE_TEMPLATE(template class SpecialMethods, , LIBND4J_TYPES_9);
-}
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
index 92084ef74..7e01e2847 100644
--- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
+++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
@@ -225,17 +225,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(MINGW) AND NOT(APPLE))
     SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic")
 endif()
 
-file(GLOB_RECURSE COMPILATION_UNITS false ../../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in)
-foreach(FL_ITEM ${COMPILATION_UNITS})
-    string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM})
-    set(FL_ITEM_WLE ${CMAKE_MATCH_1})
-    foreach(FL_TYPE_INDEX RANGE 0 9)
-        #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp")
-        configure_file(  "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY)
-        LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp )
-    endforeach()
-endforeach()
+    file(GLOB_RECURSE COMPILATION_UNITS false ../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in 
+    ../include/loops/cpu/compilation_units/*.cpp.in ../include/helpers/cpu/loops/*.cpp.in)
 
+    foreach(FL_ITEM ${COMPILATION_UNITS})   
+        genCompilation(FL_ITEM)
+    endforeach() 
 
 # this function strips path from file name, basically making up short file name, i.e. file.cpp
 function(SHORTNAME LONG_NAME OUTPUT)

From 45ebd4899c009cf7776abe275c2ef5269819245a Mon Sep 17 00:00:00 2001
From: raver119 <raver119@gmail.com>
Date: Tue, 2 Jun 2020 10:43:12 +0300
Subject: [PATCH 19/21] CUDA small sort tests (#482)

* couple of C++ sort tests

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Java sort test

Signed-off-by: raver119@gmail.com <raver119@gmail.com>
---
 libnd4j/include/array/NDArray.h               |  8 +--
 .../layers_tests/LegacyOpsCudaTests.cu        | 52 +++++++++++++++++++
 .../java/org/nd4j/nativeblas/Nd4jCuda.java    | 15 +++---
 .../java/org/nd4j/nativeblas/Nd4jCpu.java     | 15 +++---
 .../test/java/org/nd4j/linalg/Nd4jTestsC.java |  8 +++
 5 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h
index 04500a987..c314d25b6 100644
--- a/libnd4j/include/array/NDArray.h
+++ b/libnd4j/include/array/NDArray.h
@@ -354,11 +354,11 @@ namespace sd {
          * @param writeList
          * @param readList
          */
-        static void registerSpecialUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList);
-        static void prepareSpecialUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList, bool synchronizeWritables = false);
+        static void registerSpecialUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList = {});
+        static void prepareSpecialUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList = {}, bool synchronizeWritables = false);
 
-        static void registerPrimaryUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList);
-        static void preparePrimaryUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList, bool synchronizeWritables = false);
+        static void registerPrimaryUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList = {});
+        static void preparePrimaryUse(const std::vector<const NDArray*>& writeList, const std::vector<const NDArray*>& readList = {}, bool synchronizeWritables = false);
 
         /**
          * This method returns buffer pointer offset by given number of elements, wrt own data type
diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
index 53179cd68..622ce9fbb 100644
--- a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
+++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
@@ -58,3 +58,55 @@ TEST_F(LegacyOpsCudaTests, test_sortTad_1) {
 
     ASSERT_EQ(e, x);
 }
+
+TEST_F(LegacyOpsCudaTests, test_sort_1) {
+  auto x = NDArrayFactory::create<float>('c', {4}, {4.f, 2.f, 1.f, 3.f});
+  auto e = NDArrayFactory::create<float>('c', {4}, {1.f, 2.f, 3.f, 4.f});
+
+  Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
+
+  NDArray::prepareSpecialUse({&x}, {&x});
+  ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false);
+  NDArray::registerSpecialUse({&x});
+
+  ASSERT_EQ(e, x);
+}
+
+TEST_F(LegacyOpsCudaTests, test_sort_2) {
+  auto x = NDArrayFactory::create<float>('c', {4}, {4.f, 2.f, 1.f, 3.f});
+  auto e = NDArrayFactory::create<float>('c', {4}, {4.f, 3.f, 2.f, 1.f});
+
+  Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
+
+  NDArray::prepareSpecialUse({&x}, {&x});
+  ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), true);
+  NDArray::registerSpecialUse({&x});
+
+  ASSERT_EQ(e, x);
+}
+
+TEST_F(LegacyOpsCudaTests, test_sort_3) {
+  auto x = NDArrayFactory::create<double>('c', {4}, {0.5, 0.4, 0.1, 0.2});
+  auto e = NDArrayFactory::create<double>('c', {4}, {0.1, 0.2, 0.4, 0.5});
+
+  Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
+
+  NDArray::prepareSpecialUse({&x}, {&x});
+  ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false);
+  NDArray::registerSpecialUse({&x});
+
+  ASSERT_EQ(e, x);
+}
+
+TEST_F(LegacyOpsCudaTests, test_sort_4) {
+  auto x = NDArrayFactory::create<double>('c', {4}, {7, 4, 9, 2});
+  auto e = NDArrayFactory::create<double>('c', {4}, {2, 4, 7, 9});
+
+  Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
+
+  NDArray::prepareSpecialUse({&x}, {&x});
+  ::sort(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), false);
+  NDArray::registerSpecialUse({&x});
+
+  ASSERT_EQ(e, x);
+}
\ No newline at end of file
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
index ad9503849..cc6ffc19a 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
@@ -3849,13 +3849,15 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
          * @param writeList
          * @param readList
          */
-        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
-        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
-        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
+        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList);
+        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList);
+        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
+        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList);
 
-        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
-        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
-        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
+        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList);
+        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList);
+        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
+        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList);
 
         /**
          * This method returns buffer pointer offset by given number of elements, wrt own data type
@@ -5043,6 +5045,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #define LIBND4J_GRAPH_RNG_H
 
 // #include <types/u64.h>
+// #include <types/u32.h>
 // #include <system/pointercast.h>
 // #include <system/op_boilerplate.h>
 // #include <system/dll.h>
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index 402b096c6..f17f11093 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -3853,13 +3853,15 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
          * @param writeList
          * @param readList
          */
-        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
-        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
-        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
+        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList);
+        public static native void registerSpecialUse(@Const @ByRef ConstNDArrayVector writeList);
+        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
+        public static native void prepareSpecialUse(@Const @ByRef ConstNDArrayVector writeList);
 
-        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
-        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
-        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef ConstNDArrayVector readList);
+        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList);
+        public static native void registerPrimaryUse(@Const @ByRef ConstNDArrayVector writeList);
+        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList, @Const @ByRef(nullValue = "std::vector<const sd::NDArray*>({})") ConstNDArrayVector readList, @Cast("bool") boolean synchronizeWritables/*=false*/);
+        public static native void preparePrimaryUse(@Const @ByRef ConstNDArrayVector writeList);
 
         /**
          * This method returns buffer pointer offset by given number of elements, wrt own data type
@@ -5047,6 +5049,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #define LIBND4J_GRAPH_RNG_H
 
 // #include <types/u64.h>
+// #include <types/u32.h>
 // #include <system/pointercast.h>
 // #include <system/op_boilerplate.h>
 // #include <system/dll.h>
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
index c9f5cef6f..e6c380b31 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java
@@ -8484,6 +8484,14 @@ public class Nd4jTestsC extends BaseNd4jTest {
         }
     }
 
+    @Test
+    public void testSmallSort(){
+        INDArray arr = Nd4j.createFromArray(0.5, 0.4, 0.1, 0.2);
+        INDArray expected = Nd4j.createFromArray(0.1, 0.2, 0.4, 0.5);
+        INDArray sorted = Nd4j.sort(arr, true);
+        assertEquals(expected, sorted);
+    }
+
     @Override
     public char ordering() {
         return 'c';

From ee3e059b12ac4994289f17319792f557982391c9 Mon Sep 17 00:00:00 2001
From: Alex Black <blacka101@gmail.com>
Date: Fri, 5 Jun 2020 11:49:02 +1000
Subject: [PATCH 20/21] DL4J/DataVec: Fix Yolo2OutputLayer and
 ObjectDetectionRecordReader support for NHWC data format (#483)

* Fix Yolo2OutputLayer for NHWC data format

Signed-off-by: Alex Black <blacka101@gmail.com>

* ObjectDetectionRecordReader NHWC support

Signed-off-by: Alex Black <blacka101@gmail.com>
---
 .../ObjectDetectionRecordReader.java          |  57 +++-
 .../TestObjectDetectionRecordReader.java      | 281 +++++++++---------
 .../gradientcheck/YoloGradientCheckTests.java |  39 ++-
 .../layers/objdetect/Yolo2OutputLayer.java    |   6 +-
 .../nn/layers/objdetect/Yolo2OutputLayer.java |  15 +-
 .../nn/layers/objdetect/YoloUtils.java        |  20 +-
 6 files changed, 260 insertions(+), 158 deletions(-)

diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java
index 1a53a05ac..38afd6adf 100644
--- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java
+++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/objdetect/ObjectDetectionRecordReader.java
@@ -49,7 +49,7 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.point;
 /**
  * An image record reader for object detection.
  * <p>
- * Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w]
+ * Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w] (nchw) or [minibatch, h, w, 4+C] (nhwc)
  * Where the image is quantized into h x w grid locations.
  * <p>
  * Note that this matches the format required for Deeplearning4j's Yolo2OutputLayer
@@ -61,42 +61,67 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
     private final int gridW;
     private final int gridH;
     private final ImageObjectLabelProvider labelProvider;
+    private final boolean nchw;
 
     protected Image currentImage;
 
     /**
+     * As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider)} but hardcoded
+     * to NCHW format
+     */
+    public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) {
+        this(height, width, channels, gridH, gridW, true, labelProvider);
+    }
+
+    /**
+     * Create ObjectDetectionRecordReader with
      *
      * @param height        Height of the output images
      * @param width         Width of the output images
      * @param channels      Number of channels for the output images
      * @param gridH         Grid/quantization size (along  height dimension) - Y axis
      * @param gridW         Grid/quantization size (along  height dimension) - X axis
+     * @param nchw          If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return
+     *                      NHWC format labels with array shape [minibatch, h, w, 4+C]
      * @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
      */
-    public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) {
+    public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw, ImageObjectLabelProvider labelProvider) {
         super(height, width, channels, null, null);
         this.gridW = gridW;
         this.gridH = gridH;
+        this.nchw = nchw;
         this.labelProvider = labelProvider;
         this.appendLabel = labelProvider != null;
     }
 
     /**
-     * When imageTransform != null, object is removed if new center is outside of transformed image bounds.
-     *
-     * @param height        Height of the output images
-     * @param width         Width of the output images
-     * @param channels      Number of channels for the output images
-     * @param gridH         Grid/quantization size (along  height dimension) - Y axis
-     * @param gridW         Grid/quantization size (along  height dimension) - X axis
-     * @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
-     * @param imageTransform ImageTransform - used to transform image and coordinates
+     * As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider, ImageTransform)}
+     * but hardcoded to NCHW format
      */
     public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW,
-            ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
+                                       ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
+        this(height, width, channels, gridH, gridW, true, labelProvider, imageTransform);
+    }
+
+    /**
+     * When imageTransform != null, object is removed if new center is outside of transformed image bounds.
+     *
+     * @param height         Height of the output images
+     * @param width          Width of the output images
+     * @param channels       Number of channels for the output images
+     * @param gridH          Grid/quantization size (along  height dimension) - Y axis
+     * @param gridW          Grid/quantization size (along  height dimension) - X axis
+     * @param labelProvider  ImageObjectLabelProvider - used to look up which objects are in each image
+     * @param nchw           If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return
+     *                       NHWC format labels with array shape [minibatch, h, w, 4+C]
+     * @param imageTransform ImageTransform - used to transform image and coordinates
+     */
+    public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw,
+                                       ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
         super(height, width, channels, null, null);
         this.gridW = gridW;
         this.gridH = gridH;
+        this.nchw = nchw;
         this.labelProvider = labelProvider;
         this.appendLabel = labelProvider != null;
         this.imageTransform = imageTransform;
@@ -182,6 +207,10 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
             exampleNum++;
         }
 
+        if(!nchw) {
+            outImg = outImg.permute(0, 2, 3, 1);        //NCHW to NHWC
+            outLabel = outLabel.permute(0, 2, 3, 1);
+        }
         return new NDArrayRecordBatch(Arrays.asList(outImg, outLabel));
     }
 
@@ -256,6 +285,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
             imageLoader = new NativeImageLoader(height, width, channels, imageTransform);
         }
         Image image = this.imageLoader.asImageMatrix(dataInputStream);
+        if(!nchw)
+            image.setImage(image.getImage().permute(0,2,3,1));
         Nd4j.getAffinityManager().ensureLocation(image.getImage(), AffinityManager.Location.DEVICE);
 
         List<Writable> ret = RecordConverter.toRecord(image.getImage());
@@ -264,6 +295,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
             int nClasses = labels.size();
             INDArray outLabel = Nd4j.create(1, 4 + nClasses, gridH, gridW);
             label(image, imageObjectsForPath, outLabel, 0);
+            if(!nchw)
+                outLabel = outLabel.permute(0,2,3,1);   //NCHW to NHWC
             ret.add(new NDArrayWritable(outLabel));
         }
         return ret;
diff --git a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java
index d8620096a..5e4598005 100644
--- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java
+++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestObjectDetectionRecordReader.java
@@ -56,168 +56,179 @@ public class TestObjectDetectionRecordReader {
 
     @Test
     public void test() throws Exception {
-        ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider();
+        for(boolean nchw : new boolean[]{true, false}) {
+            ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider();
 
-        File f = testDir.newFolder();
-        new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f);
+            File f = testDir.newFolder();
+            new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f);
 
-        String path = new File(f, "000012.jpg").getParent();
+            String path = new File(f, "000012.jpg").getParent();
 
-        int h = 32;
-        int w = 32;
-        int c = 3;
-        int gW = 13;
-        int gH = 10;
+            int h = 32;
+            int w = 32;
+            int c = 3;
+            int gW = 13;
+            int gH = 10;
 
-        //Enforce consistent iteration order for tests
-        URI[] u = new FileSplit(new File(path)).locations();
-        Arrays.sort(u);
+            //Enforce consistent iteration order for tests
+            URI[] u = new FileSplit(new File(path)).locations();
+            Arrays.sort(u);
 
-        RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, lp);
-        rr.initialize(new CollectionInputSplit(u));
+            RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, nchw, lp);
+            rr.initialize(new CollectionInputSplit(u));
 
-        RecordReader imgRR = new ImageRecordReader(h, w, c);
-        imgRR.initialize(new CollectionInputSplit(u));
+            RecordReader imgRR = new ImageRecordReader(h, w, c, nchw);
+            imgRR.initialize(new CollectionInputSplit(u));
 
-        List<String> labels = rr.getLabels();
-        assertEquals(Arrays.asList("car", "cat"), labels);
+            List<String> labels = rr.getLabels();
+            assertEquals(Arrays.asList("car", "cat"), labels);
 
 
-        //000012.jpg - originally 500x333
-        //000019.jpg - originally 500x375
-        double[] origW = new double[]{500, 500};
-        double[] origH = new double[]{333, 375};
-        List<List<ImageObject>> l = Arrays.asList(
-                Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")),
-                Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat"))
-        );
+            //000012.jpg - originally 500x333
+            //000019.jpg - originally 500x375
+            double[] origW = new double[]{500, 500};
+            double[] origH = new double[]{333, 375};
+            List<List<ImageObject>> l = Arrays.asList(
+                    Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")),
+                    Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat"))
+            );
 
-        for (int idx = 0; idx < 2; idx++) {
-            assertTrue(rr.hasNext());
-            List<Writable> next = rr.next();
-            List<Writable> nextImgRR = imgRR.next();
+            for (int idx = 0; idx < 2; idx++) {
+                assertTrue(rr.hasNext());
+                List<Writable> next = rr.next();
+                List<Writable> nextImgRR = imgRR.next();
 
-            //Check features:
-            assertEquals(next.get(0), nextImgRR.get(0));
+                //Check features:
+                assertEquals(next.get(0), nextImgRR.get(0));
 
-            //Check labels
-            assertEquals(2, next.size());
-            assertTrue(next.get(0) instanceof NDArrayWritable);
-            assertTrue(next.get(1) instanceof NDArrayWritable);
+                //Check labels
+                assertEquals(2, next.size());
+                assertTrue(next.get(0) instanceof NDArrayWritable);
+                assertTrue(next.get(1) instanceof NDArrayWritable);
 
-            List<ImageObject> objects = l.get(idx);
+                List<ImageObject> objects = l.get(idx);
 
-            INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW);
-            for (ImageObject io : objects) {
-                double fracImageX1 = io.getX1() / origW[idx];
-                double fracImageY1 = io.getY1() / origH[idx];
-                double fracImageX2 = io.getX2() / origW[idx];
-                double fracImageY2 = io.getY2() / origH[idx];
+                INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW);
+                for (ImageObject io : objects) {
+                    double fracImageX1 = io.getX1() / origW[idx];
+                    double fracImageY1 = io.getY1() / origH[idx];
+                    double fracImageX2 = io.getX2() / origW[idx];
+                    double fracImageY2 = io.getY2() / origH[idx];
 
-                double x1C = (fracImageX1 + fracImageX2) / 2.0;
-                double y1C = (fracImageY1 + fracImageY2) / 2.0;
+                    double x1C = (fracImageX1 + fracImageX2) / 2.0;
+                    double y1C = (fracImageY1 + fracImageY2) / 2.0;
 
-                int labelGridX = (int) (x1C * gW);
-                int labelGridY = (int) (y1C * gH);
+                    int labelGridX = (int) (x1C * gW);
+                    int labelGridY = (int) (y1C * gH);
 
-                int labelIdx;
-                if (io.getLabel().equals("car")) {
-                    labelIdx = 4;
-                } else {
-                    labelIdx = 5;
+                    int labelIdx;
+                    if (io.getLabel().equals("car")) {
+                        labelIdx = 4;
+                    } else {
+                        labelIdx = 5;
+                    }
+                    expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0);
+
+                    expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW);
+                    expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH);
+                    expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW);
+                    expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH);
                 }
-                expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0);
 
-                expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW);
-                expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH);
-                expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW);
-                expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH);
+                INDArray lArr = ((NDArrayWritable) next.get(1)).get();
+                if(nchw) {
+                    assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape());
+                } else {
+                    assertArrayEquals(new long[]{1, gH, gW, 4 + 2}, lArr.shape());
+                }
+
+                if(!nchw)
+                    expLabels = expLabels.permute(0,2,3,1); //NCHW to NHWC
+
+                assertEquals(expLabels, lArr);
             }
 
-            INDArray lArr = ((NDArrayWritable) next.get(1)).get();
-            assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape());
-            assertEquals(expLabels, lArr);
-        }
+            rr.reset();
+            Record record = rr.nextRecord();
+            RecordMetaDataImageURI metadata = (RecordMetaDataImageURI) record.getMetaData();
+            assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI()));
+            assertEquals(3, metadata.getOrigC());
+            assertEquals((int) origH[0], metadata.getOrigH());
+            assertEquals((int) origW[0], metadata.getOrigW());
 
-        rr.reset();
-        Record record = rr.nextRecord();
-        RecordMetaDataImageURI metadata = (RecordMetaDataImageURI)record.getMetaData();
-        assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI()));
-        assertEquals(3, metadata.getOrigC());
-        assertEquals((int)origH[0], metadata.getOrigH());
-        assertEquals((int)origW[0], metadata.getOrigW());
+            List<Record> out = new ArrayList<>();
+            List<RecordMetaData> meta = new ArrayList<>();
+            out.add(record);
+            meta.add(metadata);
+            record = rr.nextRecord();
+            metadata = (RecordMetaDataImageURI) record.getMetaData();
+            out.add(record);
+            meta.add(metadata);
 
-        List<Record> out = new ArrayList<>();
-        List<RecordMetaData> meta = new ArrayList<>();
-        out.add(record);
-        meta.add(metadata);
-        record = rr.nextRecord();
-        metadata = (RecordMetaDataImageURI)record.getMetaData();
-        out.add(record);
-        meta.add(metadata);
+            List<Record> fromMeta = rr.loadFromMetaData(meta);
+            assertEquals(out, fromMeta);
 
-        List<Record> fromMeta = rr.loadFromMetaData(meta);
-        assertEquals(out, fromMeta);
+            // make sure we don't lose objects just by explicitly resizing
+            int i = 0;
+            int[] nonzeroCount = {5, 10};
 
-        // make sure we don't lose objects just by explicitly resizing
-        int i = 0;
-        int[] nonzeroCount = {5, 10};
+            ImageTransform transform = new ResizeImageTransform(37, 42);
+            RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, nchw, lp, transform);
+            rrTransform.initialize(new CollectionInputSplit(u));
+            i = 0;
+            while (rrTransform.hasNext()) {
+                List<Writable> next = rrTransform.next();
+                assertEquals(37, transform.getCurrentImage().getWidth());
+                assertEquals(42, transform.getCurrentImage().getHeight());
+                INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
+                BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
+                assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
+            }
 
-        ImageTransform transform = new ResizeImageTransform(37, 42);
-        RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, lp, transform);
-        rrTransform.initialize(new CollectionInputSplit(u));
-        i = 0;
-        while (rrTransform.hasNext()) {
-            List<Writable> next = rrTransform.next();
-            assertEquals(37, transform.getCurrentImage().getWidth());
-            assertEquals(42, transform.getCurrentImage().getHeight());
-            INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
-            BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
-            assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
-        }
+            ImageTransform transform2 = new ResizeImageTransform(1024, 2048);
+            RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform2);
+            rrTransform2.initialize(new CollectionInputSplit(u));
+            i = 0;
+            while (rrTransform2.hasNext()) {
+                List<Writable> next = rrTransform2.next();
+                assertEquals(1024, transform2.getCurrentImage().getWidth());
+                assertEquals(2048, transform2.getCurrentImage().getHeight());
+                INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
+                BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
+                assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
+            }
+
+            //Make sure image flip does not break labels and are correct for new image size dimensions:
+            ImageTransform transform3 = new PipelineImageTransform(
+                    new ResizeImageTransform(2048, 4096),
+                    new FlipImageTransform(-1)
+            );
+            RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform3);
+            rrTransform3.initialize(new CollectionInputSplit(u));
+            i = 0;
+            while (rrTransform3.hasNext()) {
+                List<Writable> next = rrTransform3.next();
+                INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
+                BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
+                assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
+            }
+
+            //Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception:
+            ImageTransform transform4 = new FlipImageTransform(-1);
+            RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, nchw, lp, transform4);
+            rrTransform4.initialize(new CollectionInputSplit(u));
+            i = 0;
+            while (rrTransform4.hasNext()) {
+                List<Writable> next = rrTransform4.next();
+
+                assertEquals((int) origW[i], transform4.getCurrentImage().getWidth());
+                assertEquals((int) origH[i], transform4.getCurrentImage().getHeight());
+
+                INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
+                BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
+                assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
+            }
 
-        ImageTransform transform2 = new ResizeImageTransform(1024, 2048);
-        RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform2);
-        rrTransform2.initialize(new CollectionInputSplit(u));
-        i = 0;
-        while (rrTransform2.hasNext()) {
-            List<Writable> next = rrTransform2.next();
-            assertEquals(1024, transform2.getCurrentImage().getWidth());
-            assertEquals(2048, transform2.getCurrentImage().getHeight());
-            INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
-            BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
-            assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
-        }
-        
-        //Make sure image flip does not break labels and are correct for new image size dimensions:
-        ImageTransform transform3 = new PipelineImageTransform(
-                new ResizeImageTransform(2048, 4096),
-                new FlipImageTransform(-1)
-        );
-        RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform3);
-        rrTransform3.initialize(new CollectionInputSplit(u));
-        i = 0;
-        while (rrTransform3.hasNext()) {
-            List<Writable> next = rrTransform3.next();
-            INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
-            BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
-            assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
-        }
-        
-        //Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception:
-        ImageTransform transform4 = new FlipImageTransform(-1);
-        RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, lp, transform4);
-        rrTransform4.initialize(new CollectionInputSplit(u));
-        i = 0;
-        while (rrTransform4.hasNext()) {
-            List<Writable> next = rrTransform4.next();
-            
-            assertEquals((int) origW[i], transform4.getCurrentImage().getWidth());
-            assertEquals((int) origH[i], transform4.getCurrentImage().getHeight());
-            
-            INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
-            BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
-            assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
         }
     }
 
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
index 5646b6519..47c040c12 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
@@ -24,9 +24,7 @@ import org.datavec.image.recordreader.objdetect.impl.VocLabelProvider;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
-import org.deeplearning4j.nn.conf.ConvolutionMode;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
@@ -36,6 +34,8 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@@ -50,17 +50,28 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.InputStream;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertTrue;
 
 /**
  * @author Alex Black
  */
+@RunWith(Parameterized.class)
 public class YoloGradientCheckTests extends BaseDL4JTest {
 
     static {
         Nd4j.setDataType(DataType.DOUBLE);
     }
 
+    private CNN2DFormat format;
+    public YoloGradientCheckTests(CNN2DFormat format){
+        this.format = format;
+    }
+    @Parameterized.Parameters(name = "{0}")
+    public static Object[] params(){
+        return CNN2DFormat.values();
+    }
+
     @Rule
     public TemporaryFolder testDir = new TemporaryFolder();
 
@@ -97,8 +108,14 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
 
             Nd4j.getRandom().setSeed(12345);
 
-            INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w});
-            INDArray labels = yoloLabels(mb, c, h, w);
+            INDArray input, labels;
+            if(format == CNN2DFormat.NCHW){
+                input = Nd4j.rand(DataType.DOUBLE, mb, depthIn, h, w);
+                labels = yoloLabels(mb, c, h, w);
+            } else {
+                input = Nd4j.rand(DataType.DOUBLE, mb, h, w, depthIn);
+                labels = yoloLabels(mb, c, h, w).permute(0,2,3,1);
+            }
 
             MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
                     .dataType(DataType.DOUBLE)
@@ -112,6 +129,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
                     .layer(new Yolo2OutputLayer.Builder()
                             .boundingBoxPriors(bbPrior)
                             .build())
+                    .setInputType(InputType.convolutional(h, w, depthIn, format))
                     .build();
 
             MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -120,7 +138,18 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
             String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
             System.out.println(msg);
 
+            INDArray out = net.output(input);
+            if(format == CNN2DFormat.NCHW){
+                assertArrayEquals(new long[]{mb, yoloDepth, h, w}, out.shape());
+            } else {
+                assertArrayEquals(new long[]{mb, h, w, yoloDepth}, out.shape());
+            }
+
+            net.fit(input, labels);
+
+
             boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
+                    .minAbsoluteError(1e-6)
                     .labels(labels).subset(true).maxPerParam(100));
 
             assertTrue(msg, gradOK);
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
index 24bda07f6..6ffb92978 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
@@ -21,6 +21,7 @@ import lombok.Getter;
 import lombok.Setter;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -80,6 +81,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer {
     @JsonDeserialize(using = BoundingBoxesDeserializer.class)
     private INDArray boundingBoxes;
 
+    private CNN2DFormat format = CNN2DFormat.NCHW;  //Default for serialization of old formats
+
     private Yolo2OutputLayer() {
         //No-arg constructor for Jackson JSON
     }
@@ -119,7 +122,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer {
 
     @Override
     public void setNIn(InputType inputType, boolean override) {
-        //No op
+        InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
+        this.format = c.getFormat();
     }
 
     @Override
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
index eb5a4d19e..4d118c62b 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
@@ -19,6 +19,7 @@ package org.deeplearning4j.nn.layers.objdetect;
 import lombok.*;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.layers.IOutputLayer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
@@ -110,6 +111,12 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
         Preconditions.checkState(labels.rank() == 4, "Expected rank 4 labels array with shape [minibatch, 4+numClasses, h, w]" +
                 " but got rank %s labels array with shape %s", labels.rank(), labels.shape());
 
+        boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;
+        INDArray input = nchw ? this.input : this.input.permute(0,3,1,2);   //NHWC to NCHW
+        INDArray labels = this.labels.castTo(input.dataType());     //Ensure correct dtype (same as params); no-op if already correct dtype
+        if(!nchw)
+            labels = labels.permute(0,3,1,2);   //NHWC to NCHW
+
         double lambdaCoord = layerConf().getLambdaCoord();
         double lambdaNoObj = layerConf().getLambdaNoObj();
 
@@ -119,7 +126,7 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
         int b = (int) layerConf().getBoundingBoxes().size(0);
         int c = (int) labels.size(1)-4;
 
-        INDArray labels = this.labels.castTo(input.dataType());     //Ensure correct dtype (same as params); no-op if already correct dtype
+
 
         //Various shape arrays, to reuse
         long[] nhw = new long[]{mb, h, w};
@@ -380,13 +387,17 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
         epsWH.addi(dLc_din_wh);
         epsXY.addi(dLc_din_xy);
 
+        if(!nchw)
+            epsOut = epsOut.permute(0,2,3,1);   //NCHW to NHWC
+
         return epsOut;
     }
 
     @Override
     public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
         assertInputSet(false);
-        return YoloUtils.activate(layerConf().getBoundingBoxes(), input, workspaceMgr);
+        boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;
+        return YoloUtils.activate(layerConf().getBoundingBoxes(), input, nchw, workspaceMgr);
     }
 
     @Override
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java
index 0bfb99120..06423a5a7 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java
@@ -39,12 +39,23 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.*;
  */
 public class YoloUtils {
 
-    /** Essentially: just apply activation functions... */
+    /** Essentially: just apply activation functions... For NCHW format. For NCHW format, use one of the other activate methods */
     public static INDArray activate(INDArray boundingBoxPriors, INDArray input) {
-        return activate(boundingBoxPriors, input, LayerWorkspaceMgr.noWorkspaces());
+        return activate(boundingBoxPriors, input, true);
     }
 
-    public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr){
+    public static INDArray activate(INDArray boundingBoxPriors, INDArray input, boolean nchw) {
+        return activate(boundingBoxPriors, input, nchw, LayerWorkspaceMgr.noWorkspaces());
+    }
+
+    public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr) {
+        return activate(boundingBoxPriors, input, true, layerWorkspaceMgr);
+    }
+
+    public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, boolean nchw, LayerWorkspaceMgr layerWorkspaceMgr){
+        if(!nchw)
+            input = input.permute(0,3,1,2); //NHWC to NCHW
+
         long mb = input.size(0);
         long h = input.size(2);
         long w = input.size(3);
@@ -83,6 +94,9 @@ public class YoloUtils {
         INDArray outputClasses = output5.get(all(), all(), interval(5, 5+c), all(), all());   //Shape: [minibatch, C, H, W]
         outputClasses.assign(postSoftmax5d);
 
+        if(!nchw)
+            output = output.permute(0,2,3,1);       //NCHW to NHWC
+
         return output;
     }
 

From ac7fb903d7badf2fd042a8762fdfebd809c72a09 Mon Sep 17 00:00:00 2001
From: raver119 <raver119@gmail.com>
Date: Sat, 6 Jun 2020 15:26:55 +0300
Subject: [PATCH 21/21] C++ rearrangements (#485)

* initial commit

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* some minor singleton changes

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* more iterations

Signed-off-by: raver119 <raver119@gmail.com>

* more singletons updated

Signed-off-by: raver119 <raver119@gmail.com>

* more singletons updated

Signed-off-by: raver119 <raver119@gmail.com>

* more changes

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* CUDA updates

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Java side update

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* one commented out test

Signed-off-by: raver119@gmail.com <raver119@gmail.com>
---
 libnd4j/UnderstandingGraph.md                 |   2 +-
 libnd4j/include/array/ConstantDataBuffer.h    |  29 +-
 libnd4j/include/array/ConstantOffsetsBuffer.h |  49 ++++
 libnd4j/include/array/ConstantShapeBuffer.h   |  49 ++++
 .../include/array/CudaPointerDeallocator.h    |  38 +++
 libnd4j/include/array/DataTypeUtils.h         |   6 +-
 libnd4j/include/array/NDArray.h               |  21 +-
 libnd4j/include/array/NDArray.hXX             | 169 ++++++------
 libnd4j/include/array/PointerDeallocator.h    |  39 +++
 libnd4j/include/array/PointerWrapper.h        |  49 ++++
 .../include/array/PrimaryPointerDeallocator.h |  38 +++
 libnd4j/include/array/TadPack.h               |   9 +-
 libnd4j/include/array/cpu/NDArray.cpp         |   2 +-
 .../array/cuda/CudaPointerDeallocator.cu      |  29 ++
 libnd4j/include/array/cuda/DataBuffer.cu      |  12 +-
 libnd4j/include/array/cuda/NDArray.cu         |   2 +-
 .../include/array/impl/ConstantDataBuffer.cpp |  57 ++--
 .../array/impl/ConstantOffsetsBuffer.cpp      |  51 ++++
 .../array/impl/ConstantShapeBuffer.cpp        |  51 ++++
 libnd4j/include/array/impl/DataBuffer.cpp     |  22 +-
 .../include/array/impl/PointerDeallocator.cpp |  29 ++
 libnd4j/include/array/impl/PointerWrapper.cpp |  37 +++
 .../array/impl/PrimaryPointerDeallocator.cpp  |  29 ++
 libnd4j/include/array/impl/TadPack.cpp        |  16 +-
 libnd4j/include/execution/ThreadPool.h        |   6 +-
 libnd4j/include/execution/Threads.h           |  16 +-
 .../include/execution/cpu/LaunchContext.cpp   |  19 +-
 .../include/execution/cuda/LaunchContext.cu   |  52 ++--
 libnd4j/include/execution/impl/ThreadPool.cpp |  30 +-
 libnd4j/include/execution/impl/Threads.cpp    |  14 +-
 libnd4j/include/execution/impl/Ticket.cpp     |   6 +-
 libnd4j/include/graph/ContextPrototype.h      |   2 +-
 libnd4j/include/graph/GraphHolder.h           |   3 +-
 .../graph/execution/impl/LogicReturn.cpp      |   4 +-
 .../graph/execution/impl/LogicWhile.cpp       |   2 +-
 libnd4j/include/graph/impl/Context.cpp        |   2 +-
 libnd4j/include/graph/impl/Graph.cpp          |  14 +-
 .../include/graph/impl/GraphExecutioner.cpp   |  26 +-
 libnd4j/include/graph/impl/GraphHolder.cpp    |  10 +-
 libnd4j/include/graph/impl/Node.cpp           |   2 +-
 libnd4j/include/helpers/BlasHelper.h          |   4 +-
 libnd4j/include/helpers/ConstantHelper.h      |   5 +-
 libnd4j/include/helpers/ConstantShapeHelper.h |  18 +-
 libnd4j/include/helpers/ConstantTadHelper.h   |   4 +-
 libnd4j/include/helpers/DebugHelper.h         |   2 +-
 libnd4j/include/helpers/LoopKind.h            |   2 +-
 libnd4j/include/helpers/Loops.h               |   8 +-
 libnd4j/include/helpers/OpTracker.h           |   4 +-
 .../helpers/benchmark/BroadcastBenchmark.h    |  12 +-
 .../helpers/benchmark/DeclarableBenchmark.h   |   2 +-
 .../helpers/benchmark/ReductionBenchmark.h    |   6 +-
 .../include/helpers/cpu/ConstantHelper.cpp    |  25 +-
 .../helpers/cpu/ConstantShapeHelper.cpp       |  63 ++---
 .../include/helpers/cpu/ConstantTadHelper.cpp |  59 +---
 libnd4j/include/helpers/cpu/MmulHelper.cpp    |  16 +-
 libnd4j/include/helpers/cpu/cublasHelper.cpp  |  11 +-
 libnd4j/include/helpers/cublasHelper.h        |   5 +-
 .../include/helpers/cuda/ConstantHelper.cu    |  29 +-
 .../helpers/cuda/ConstantShapeHelper.cu       |  51 ++--
 .../include/helpers/cuda/ConstantTadHelper.cu |  29 +-
 .../include/helpers/cuda_off/MmulHelper.cu    |   8 +-
 .../include/helpers/cuda_off/cublasHelper.cu  |  13 +-
 libnd4j/include/helpers/helper_hash.h         |   4 +-
 libnd4j/include/helpers/impl/BlasHelper.cpp   |  29 +-
 .../include/helpers/impl/OmpLaunchHelper.cpp  |  12 +-
 libnd4j/include/helpers/impl/OpTracker.cpp    |  10 +-
 libnd4j/include/helpers/impl/ShapeUtils.cpp   |  30 +-
 libnd4j/include/helpers/impl/helper_hash.cpp  |  10 +-
 libnd4j/include/helpers/logger.h              |   6 +-
 libnd4j/include/helpers/shape.h               |   8 +-
 libnd4j/include/legacy/NativeOps.h            |  10 +-
 .../legacy/cpu/NativeOpExecutioner.cpp        |  54 ++--
 libnd4j/include/legacy/cpu/NativeOps.cpp      |  80 +++---
 .../legacy/cuda/NativeOpExecutioner.cu        |  22 +-
 libnd4j/include/legacy/cuda/NativeOps.cu      | 246 +++++++++--------
 libnd4j/include/legacy/impl/Environment.cpp   |  23 +-
 libnd4j/include/loops/cpu/broadcasting.hpp    |   6 +-
 .../include/loops/cpu/broadcasting_bool.hpp   |   8 +-
 .../include/loops/cpu/broadcasting_int.hpp    |   8 +-
 libnd4j/include/loops/cpu/indexreduce.hpp     |   4 +-
 .../include/loops/cpu/reduce/reduce_bool.cpp  |   4 +-
 .../include/loops/cpu/reduce/reduce_float.hpp |   6 +-
 .../include/loops/cpu/reduce/reduce_long.cpp  |   6 +-
 .../include/loops/cpu/reduce/reduce_same.cpp  |   6 +-
 libnd4j/include/loops/cpu/reduce3.hpp         |   2 +-
 libnd4j/include/loops/cpu/scalar.hpp          |   2 +-
 libnd4j/include/loops/cpu/scalar_bool.cpp     |   2 +-
 libnd4j/include/loops/cpu/scalar_int.cpp      |   2 +-
 .../include/loops/cpu/summarystatsreduce.cpp  |   2 +-
 .../loops/cuda/legacy/transform.legacy        |   2 +-
 libnd4j/include/loops/cuda/scalar.chpp        |   2 +-
 libnd4j/include/loops/cuda/scalar_bool.cu     |   2 +-
 libnd4j/include/loops/cuda/scalar_int.cu      |   2 +-
 .../include/loops/cuda/summarystatsreduce.cu  |   6 +-
 libnd4j/include/memory/MemoryCounter.h        |   4 +-
 libnd4j/include/memory/MemoryRegistrator.h    |   3 +-
 libnd4j/include/memory/MemoryTracker.h        |   3 +-
 libnd4j/include/memory/impl/MemoryCounter.cpp |  14 +-
 .../include/memory/impl/MemoryRegistrator.cpp |  11 +-
 libnd4j/include/memory/impl/MemoryTracker.cpp |  14 +-
 .../include/ops/declarable/OpRegistrator.h    |   2 +-
 .../generic/bitwise/bits_hamming_distance.cpp |   2 +-
 .../declarable/generic/blas/batched_gemm.cpp  |   4 +-
 .../ops/declarable/generic/blas/matmul.cpp    |   2 +-
 .../declarable/generic/blas/tensormmul.cpp    |   2 +-
 .../ops/declarable/generic/boolean/choose.cpp |   4 +-
 .../ops/declarable/generic/boolean/where.cpp  |   2 +-
 .../declarable/generic/boolean/where_np.cpp   |   4 +-
 .../generic/compat/compat_sparse_to_dense.cpp |   2 +-
 .../generic/compat/compat_string_split.cpp    |   4 +-
 .../declarable/generic/compression/bitmap.cpp |   4 +-
 .../generic/compression/threshold.cpp         |   2 +-
 .../declarable/generic/datatypes/bitcast.cpp  |   8 +-
 .../ops/declarable/generic/datatypes/cast.cpp |   2 +-
 .../generic/helpers/BroadcastHelper.h         |   2 +-
 .../generic/images/crop_and_resize.cpp        |   2 +-
 .../generic/images/image_resize.cpp           |   2 +-
 .../generic/images/resize_images.cpp          |   2 +-
 .../declarable/generic/images/rgbToGrs.cpp    |   2 +-
 .../generic/kernels/knn_mindistance.cpp       |   2 +-
 .../declarable/generic/linalg/diagPart.cpp    |   2 +-
 .../ops/declarable/generic/linalg/eye.cpp     |   2 +-
 .../ops/declarable/generic/linalg/lstsq.cpp   |   8 +-
 .../generic/linalg/matrixDiagPart.cpp         |   2 +-
 .../generic/linalg/matrix_determinant.cpp     |  18 +-
 .../ops/declarable/generic/linalg/qr.cpp      |   8 +-
 .../generic/linalg/sufficient_statistics.cpp  |   4 +-
 .../ops/declarable/generic/linalg/svd.cpp     |   4 +-
 .../ops/declarable/generic/linalg/trace.cpp   |   2 +-
 .../ops/declarable/generic/linalg/tri.cpp     |   2 +-
 .../generic/loss/absoluteDifference.cpp       |   4 +-
 .../generic/loss/cosineDistance.cpp           |   2 +-
 .../ops/declarable/generic/loss/hingeLoss.cpp |   4 +-
 .../ops/declarable/generic/loss/huberLoss.cpp |   4 +-
 .../ops/declarable/generic/loss/l2_loss.cpp   |   2 +-
 .../ops/declarable/generic/loss/logLoss.cpp   |   4 +-
 .../generic/loss/log_poisson_loss.cpp         |   4 +-
 .../generic/loss/meanPairWsSqErr.cpp          |   2 +-
 .../ops/declarable/generic/loss/meanSqErr.cpp |   4 +-
 .../generic/loss/sigmCrossEntropy.cpp         |   4 +-
 .../generic/loss/softmaxCrossEntropy.cpp      |   8 +-
 .../loss/softmaxCrossEntropyWithLogits.cpp    |   4 +-
 .../generic/nn/activations/crelu.cpp          |   4 +-
 .../ops/declarable/generic/nn/batchnorm.cpp   |   4 +-
 .../ops/declarable/generic/nn/bias_add.cpp    |   2 +-
 .../declarable/generic/nn/convo/deconv2d.cpp  |   2 +-
 .../generic/nn/convo/deconv2d_tf.cpp          |   2 +-
 .../generic/nn/convo/dilation2d.cpp           |   4 +-
 .../generic/nn/dot_product_attention.cpp      |   4 +-
 .../generic/nn/embedding_lookup.cpp           |   4 +-
 .../nn/multi_head_dot_product_attention.cpp   |   4 +-
 .../generic/nn/pooling/avgpool2d.cpp          |   4 +-
 .../generic/nn/pooling/avgpool3d.cpp          |   4 +-
 .../generic/nn/pooling/maxpool2d.cpp          |   4 +-
 .../generic/nn/pooling/maxpool3d.cpp          |   4 +-
 .../nn/pooling/maxpool_with_argmax.cpp        |   4 +-
 .../generic/nn/pooling/pnormpool2d.cpp        |   4 +-
 .../declarable/generic/nn/recurrent/gru.cpp   |  12 +-
 .../generic/nn/recurrent/gruCell.cpp          |   2 +-
 .../generic/nn/recurrent/lstmCell.cpp         |   2 +-
 .../generic/nn/recurrent/lstmLayer.cpp        |   6 +-
 .../declarable/generic/nn/recurrent/sru.cpp   |  12 +-
 .../generic/nn/recurrent/sruCell.cpp          |   2 +-
 .../generic/parity_ops/bincount.cpp           |   2 +-
 .../parity_ops/broadcast_dynamic_shape.cpp    |   2 +-
 .../generic/parity_ops/check_numerics.cpp     |   2 +-
 .../parity_ops/compare_and_bitpack.cpp        |   2 +-
 .../generic/parity_ops/confusion_matrix.cpp   |   2 +-
 .../declarable/generic/parity_ops/expose.cpp  |   2 +-
 .../generic/parity_ops/in_top_k.cpp           |   2 +-
 .../generic/parity_ops/listdiff.cpp           |   4 +-
 .../parity_ops/non_max_suppression.cpp        |   4 +-
 .../non_max_suppression_overlaps.cpp          |   2 +-
 .../generic/parity_ops/nth_element.cpp        |   4 +-
 .../declarable/generic/parity_ops/onehot.cpp  |   2 +-
 .../declarable/generic/parity_ops/top_k.cpp   |   2 +-
 .../declarable/generic/parity_ops/unique.cpp  |  12 +-
 .../generic/parity_ops/zero_fraction.cpp      |   2 +-
 .../declarable/generic/random/bernoulli.cpp   |   2 +-
 .../declarable/generic/random/exponential.cpp |   2 +-
 .../ops/declarable/generic/random/gamma.cpp   |   2 +-
 .../declarable/generic/random/get_seed.cpp    |   2 +-
 .../declarable/generic/random/multinomial.cpp |   2 +-
 .../ops/declarable/generic/random/normal.cpp  |   2 +-
 .../ops/declarable/generic/random/poisson.cpp |   2 +-
 .../declarable/generic/random/random_crop.cpp |   2 +-
 .../declarable/generic/random/set_seed.cpp    |   2 +-
 .../ops/declarable/generic/random/uniform.cpp |   2 +-
 .../ops/declarable/generic/reduce/argamax.cpp |   2 +-
 .../ops/declarable/generic/reduce/argamin.cpp |   2 +-
 .../ops/declarable/generic/reduce/argmax.cpp  |   2 +-
 .../ops/declarable/generic/reduce/argmin.cpp  |   2 +-
 .../declarable/generic/shape/broadcast_to.cpp |   2 +-
 .../shape/evaluate_reduction_shape.cpp        |   4 +-
 .../declarable/generic/shape/expand_dims.cpp  |   6 +-
 .../ops/declarable/generic/shape/flatten.cpp  |   2 +-
 .../ops/declarable/generic/shape/order.cpp    |   2 +-
 .../ops/declarable/generic/shape/rank.cpp     |   2 +-
 .../ops/declarable/generic/shape/reshape.cpp  |   4 +-
 .../ops/declarable/generic/shape/shape.cpp    |   2 +-
 .../ops/declarable/generic/shape/shapes.cpp   |   2 +-
 .../ops/declarable/generic/shape/size.cpp     |   2 +-
 .../ops/declarable/generic/shape/size_at.cpp  |   2 +-
 .../ops/declarable/generic/shape/squeeze.cpp  |   6 +-
 .../generic/shape/tile_to_shape.cpp           |   2 +-
 .../ops/declarable/generic/tensor/create.cpp  |   2 +-
 .../ops/declarable/generic/tensor/fill.cpp    |   2 +-
 .../declarable/generic/tensor/lin_space.cpp   |   2 +-
 .../ops/declarable/generic/tensor/ones_as.cpp |   2 +-
 .../ops/declarable/generic/tensor/range.cpp   |  14 +-
 .../generic/tensor/strided_slice.cpp          |  14 +-
 .../declarable/generic/tensor/zeros_as.cpp    |   2 +-
 .../declarable/generic/tests/test_scalar.cpp  |   2 +-
 .../declarable/generic/tests/testcustom.cpp   |   2 +-
 .../generic/thrid_party/firas_sparse.cpp      |   2 +-
 .../generic/transforms/batch_to_space.cpp     |   2 +-
 .../generic/transforms/batch_to_space_nd.cpp  |   2 +-
 .../transforms/clip_by_global_norm.cpp        |   2 +-
 .../declarable/generic/transforms/concat.cpp  |  16 +-
 .../generic/transforms/depth_to_space.cpp     |   2 +-
 .../generic/transforms/dynamic_stitch.cpp     |   2 +-
 .../declarable/generic/transforms/gather.cpp  |   2 +-
 .../generic/transforms/hashcode.cpp           |   2 +-
 .../generic/transforms/histogram.cpp          |   2 +-
 .../transforms/histogram_fixed_width.cpp      |   2 +-
 .../generic/transforms/merge_add.cpp          |   2 +-
 .../generic/transforms/merge_avg.cpp          |   2 +-
 .../generic/transforms/merge_max.cpp          |   2 +-
 .../generic/transforms/mirrorPad.cpp          |   2 +-
 .../ops/declarable/generic/transforms/pad.cpp |   2 +-
 .../declarable/generic/transforms/repeat.cpp  |   2 +-
 .../declarable/generic/transforms/slice.cpp   |   8 +-
 .../generic/transforms/space_to_batch.cpp     |   2 +-
 .../generic/transforms/space_to_batch_nd.cpp  |   2 +-
 .../generic/transforms/space_to_depth.cpp     |   2 +-
 .../declarable/generic/transforms/split.cpp   |   4 +-
 .../declarable/generic/transforms/split_v.cpp |   2 +-
 .../declarable/generic/transforms/stack.cpp   |   8 +-
 .../declarable/generic/transforms/tear.cpp    |   4 +-
 .../declarable/generic/transforms/tile.cpp    |   2 +-
 .../declarable/generic/transforms/unstack.cpp |   6 +-
 .../generic/util/print_affinity.cpp           |   2 +-
 .../generic/util/print_variable.cpp           |   4 +-
 .../ops/declarable/helpers/cpu/addBias.cpp    |   4 +-
 .../ops/declarable/helpers/cpu/adjust_hue.cpp |   4 +-
 .../helpers/cpu/adjust_saturation.cpp         |   4 +-
 .../declarable/helpers/cpu/batched_gemm.cpp   |   6 +-
 .../ops/declarable/helpers/cpu/dynamic.cpp    |   4 +-
 .../ops/declarable/helpers/cpu/gather.cpp     |  16 +-
 .../helpers/cpu/gatherTransforms.cpp          |   2 +-
 .../declarable/helpers/cpu/imagesHelpers.cpp  |  12 +-
 .../helpers/cpu/indexReductions.hpp           |   4 +-
 .../ops/declarable/helpers/cpu/ismax.cpp      |   4 +-
 .../ops/declarable/helpers/cpu/lrn.cpp        |   8 +-
 .../ops/declarable/helpers/cpu/lup.cpp        |   2 +-
 .../declarable/helpers/cpu/nth_element.cpp    |   2 +-
 .../ops/declarable/helpers/cpu/one_hot.cpp    |   2 +-
 .../declarable/helpers/cpu/randomShuffle.cpp  |   6 +-
 .../ops/declarable/helpers/cpu/roll.cpp       |   4 +-
 .../ops/declarable/helpers/cpu/scatter.cpp    |   8 +-
 .../ops/declarable/helpers/cpu/softmax.cpp    |   2 +-
 .../ops/declarable/helpers/cpu/stack.cpp      |  12 +-
 .../declarable/helpers/cuda/activations.cu    |   4 +-
 .../ops/declarable/helpers/cuda/adjust_hue.cu |  18 +-
 .../helpers/cuda/adjust_saturation.cu         |  18 +-
 .../ops/declarable/helpers/cuda/batchnorm.cu  |   6 +-
 .../ops/declarable/helpers/cuda/confusion.cu  |   2 +-
 .../ops/declarable/helpers/cuda/dynamic.cu    |   8 +-
 .../helpers/cuda/extract_patches.cu           |   4 +-
 .../helpers/cuda/histogramFixedWidth.cu       |   6 +-
 .../declarable/helpers/cuda/imagesHelpers.cu  |  24 +-
 .../helpers/cuda/indexReductions.cu           |   8 +-
 .../ops/declarable/helpers/cuda/ismax.cu      |   2 +-
 .../ops/declarable/helpers/cuda/lrn.cu        |   8 +-
 .../ops/declarable/helpers/cuda/lstsq.cu      |   2 +-
 .../ops/declarable/helpers/cuda/lup.cu        |  24 +-
 .../declarable/helpers/cuda/matrix_band.cu    |   4 +-
 .../helpers/cuda/matrix_diag_part.cu          |   4 +-
 .../ops/declarable/helpers/cuda/meshgrid.cu   |   2 +-
 .../declarable/helpers/cuda/nth_element.cu    |   2 +-
 .../ops/declarable/helpers/cuda/percentile.cu |   2 +-
 .../ops/declarable/helpers/cuda/prefix.cu     |   4 +-
 .../include/ops/declarable/helpers/cuda/qr.cu |   6 +-
 .../ops/declarable/helpers/cuda/reverse.cu    |   4 +-
 .../ops/declarable/helpers/cuda/roll.cu       |   2 +-
 .../ops/declarable/helpers/cuda/scatter.cu    |  40 +--
 .../declarable/helpers/cuda/scatter_simple.cu |   2 +-
 .../declarable/helpers/cuda/scatter_update.cu |   4 +-
 .../declarable/helpers/cuda/segment_max.cu    |  24 +-
 .../declarable/helpers/cuda/segment_mean.cu   |  24 +-
 .../declarable/helpers/cuda/segment_min.cu    |  24 +-
 .../declarable/helpers/cuda/segment_prod.cu   |  24 +-
 .../declarable/helpers/cuda/segment_sqrtn.cu  |  12 +-
 .../declarable/helpers/cuda/segment_sum.cu    |  20 +-
 .../ops/declarable/helpers/cuda/solve.cu      |  10 +-
 .../ops/declarable/helpers/cuda/stack.cu      |   8 +-
 .../ops/declarable/helpers/cuda/top_k.cu      |   8 +-
 .../ops/declarable/helpers/cuda/transforms.cu |   2 +-
 .../helpers/cuda/triangular_solve.cu          |  10 +-
 .../include/ops/declarable/impl/BooleanOp.cpp |   2 +-
 .../declarable/impl/BroadcastableBoolOp.cpp   |  18 +-
 .../ops/declarable/impl/BroadcastableOp.cpp   |  20 +-
 .../ops/declarable/impl/DeclarableListOp.cpp  |   2 +-
 .../ops/declarable/impl/DeclarableOp.cpp      |  30 +-
 .../declarable/impl/DeclarableReductionOp.cpp |   2 +-
 .../declarable/impl/LegacyBroadcastBoolOp.cpp |  14 +-
 .../ops/declarable/impl/LegacyBroadcastOp.cpp |  12 +-
 .../declarable/impl/LegacyIndexReduceOp.cpp   |  14 +-
 .../impl/LegacyPairwiseTransformBoolOp.cpp    |   2 +-
 .../ops/declarable/impl/LegacyRandomOp.cpp    |   2 +-
 .../ops/declarable/impl/LegacyReduce3Op.cpp   |  12 +-
 .../declarable/impl/LegacyReduceBoolOp.cpp    |  12 +-
 .../declarable/impl/LegacyReduceFloatOp.cpp   |  12 +-
 .../declarable/impl/LegacyReduceLongOp.cpp    |  12 +-
 .../declarable/impl/LegacyReduceSameOp.cpp    |  12 +-
 .../ops/declarable/impl/LegacyScalarOp.cpp    |   2 +-
 .../ops/declarable/impl/LegacyStatsOp.cpp     |   6 +-
 .../declarable/impl/LegacyTransformBoolOp.cpp |   2 +-
 .../ops/declarable/impl/OpDescriptor.cpp      |   6 +-
 .../ops/declarable/impl/OpRegistrator.cpp     |  32 +--
 .../ops/declarable/impl/PlatformHelper.cpp    |   2 +-
 libnd4j/include/ops/impl/gemm.cpp             |   2 +-
 libnd4j/include/ops/impl/specials_double.hpp  |   8 +-
 libnd4j/include/ops/special_random_ops.h      |  12 +-
 libnd4j/include/system/Environment.h          |   6 +-
 libnd4j/include/system/op_boilerplate.h       |  34 +--
 libnd4j/include/system/platform_boilerplate.h |   2 +-
 libnd4j/minifier/minifier.cpp                 |   2 +-
 libnd4j/server/GraphServer.cpp                |  10 +-
 .../layers_tests/ConditionalTests.cpp         |  12 +-
 .../layers_tests/ConstantShapeHelperTests.cpp |  40 +--
 .../layers_tests/ConvolutionTests1.cpp        |   1 -
 .../layers_tests/CudaBasicsTests1.cu          |  26 +-
 .../layers_tests/CudaBasicsTests2.cu          |  20 +-
 .../layers_tests/DataBufferTests.cpp          |  26 +-
 .../layers_tests/DataBufferTestsCuda.cu       |  34 +--
 .../layers_tests/DeclarableOpsTests1.cpp      |  24 +-
 .../layers_tests/DeclarableOpsTests12.cpp     |   8 +-
 .../layers_tests/DeclarableOpsTests13.cpp     |   6 +-
 .../layers_tests/DeclarableOpsTests14.cpp     |   2 +-
 .../layers_tests/DeclarableOpsTests3.cpp      |  20 +-
 .../layers_tests/DeclarableOpsTests4.cpp      |   2 +-
 libnd4j/tests_cpu/layers_tests/EmptyTests.cpp |   2 +-
 .../layers_tests/ExtraArgumentsTests.cpp      |   2 +-
 .../layers_tests/FlatBuffersTests.cpp         |  24 +-
 .../layers_tests/GraphHolderTests.cpp         |  28 +-
 .../layers_tests/GraphStateTests.cpp          |   8 +-
 libnd4j/tests_cpu/layers_tests/GraphTests.cpp |   8 +-
 .../tests_cpu/layers_tests/HashUtilsTests.cpp |   4 +-
 .../layers_tests/JavaInteropTests.cpp         |  48 ++--
 .../layers_tests/LegacyOpsCudaTests.cu        |   2 +-
 .../tests_cpu/layers_tests/LegacyOpsTests.cpp |  28 +-
 libnd4j/tests_cpu/layers_tests/MmapTests.cpp  |   2 +-
 .../layers_tests/MultiDataTypeTests.cpp       |  80 +++---
 .../layers_tests/NDArrayCudaBasicsTests.cu    |   6 +-
 .../tests_cpu/layers_tests/NativeOpsTests.cpp |  28 +-
 .../layers_tests/OmpLaunchHelperTests.cpp     |  12 +-
 .../tests_cpu/layers_tests/OpTrackerTests.cpp |  12 +-
 .../layers_tests/PlaygroundTests.cpp          |  16 +-
 .../layers_tests/ServerRelatedTests.cpp       |  26 +-
 .../tests_cpu/layers_tests/SortCpuTests.cpp   |   8 +-
 libnd4j/tests_cpu/layers_tests/TadTests.cpp   |   4 +-
 .../tests_cpu/layers_tests/ThreadsTests.cpp   |  24 +-
 .../tests_cpu/layers_tests/WorkspaceTests.cpp |  24 +-
 .../java/org/nd4j/nativeblas/NativeOps.java   |   9 +-
 .../nativeblas/OpaqueConstantShapeBuffer.java |  27 ++
 .../ops/executioner/CudaExecutioner.java      |  10 +-
 .../java/org/nd4j/nativeblas/Nd4jCuda.java    | 227 +++++++++++++--
 .../org/nd4j/nativeblas/Nd4jCudaPresets.java  |   8 +-
 .../nativecpu/ops/NativeOpExecutioner.java    |   6 +-
 .../java/org/nd4j/nativeblas/Nd4jCpu.java     | 259 +++++++++++++++---
 .../org/nd4j/nativeblas/Nd4jCpuPresets.java   |   8 +-
 .../profiling/PerformanceTrackerTests.java    |   1 +
 373 files changed, 2698 insertions(+), 1836 deletions(-)
 create mode 100644 libnd4j/include/array/ConstantOffsetsBuffer.h
 create mode 100644 libnd4j/include/array/ConstantShapeBuffer.h
 create mode 100644 libnd4j/include/array/CudaPointerDeallocator.h
 create mode 100644 libnd4j/include/array/PointerDeallocator.h
 create mode 100644 libnd4j/include/array/PointerWrapper.h
 create mode 100644 libnd4j/include/array/PrimaryPointerDeallocator.h
 create mode 100644 libnd4j/include/array/cuda/CudaPointerDeallocator.cu
 create mode 100644 libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp
 create mode 100644 libnd4j/include/array/impl/ConstantShapeBuffer.cpp
 create mode 100644 libnd4j/include/array/impl/PointerDeallocator.cpp
 create mode 100644 libnd4j/include/array/impl/PointerWrapper.cpp
 create mode 100644 libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp
 create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java

diff --git a/libnd4j/UnderstandingGraph.md b/libnd4j/UnderstandingGraph.md
index 7e2231c08..d1c51b428 100644
--- a/libnd4j/UnderstandingGraph.md
+++ b/libnd4j/UnderstandingGraph.md
@@ -77,7 +77,7 @@ If you're adding new ops, and want to make sure they run ok on your specific dev
 Despite being simple - it still provides you with time spent in various parts of Graph.
 
 ```c++
-Environment::getInstance()->setProfiling(true);
+Environment::getInstance().setProfiling(true);
 auto graph = GraphExecutioner::importFromFlatBuffers("./resources/ae_00.fb");
 
 auto profile = GraphProfilingHelper::profile(graph, 1000);
diff --git a/libnd4j/include/array/ConstantDataBuffer.h b/libnd4j/include/array/ConstantDataBuffer.h
index e8bafe114..197b93307 100644
--- a/libnd4j/include/array/ConstantDataBuffer.h
+++ b/libnd4j/include/array/ConstantDataBuffer.h
@@ -22,37 +22,40 @@
 
 #include <system/dll.h>
 #include <system/pointercast.h>
+#include <memory>
+#include <array/PointerWrapper.h>
+#include <array/DataType.h>
 
 
 namespace sd {
     class ND4J_EXPORT ConstantDataBuffer {
     private:
-        Nd4jPointer _primaryBuffer = nullptr;
-        Nd4jPointer _specialBuffer = nullptr;
-        Nd4jLong _length = 0;
-        Nd4jLong _sizeOf = 0;
+      std::shared_ptr<PointerWrapper> _primaryBuffer;
+      std::shared_ptr<PointerWrapper> _specialBuffer = nullptr;
+      uint64_t _length = 0;
+      uint8_t _sizeOf = 0;
 
     public:
-        ConstantDataBuffer(Nd4jPointer primary, Nd4jPointer special, Nd4jLong numEelements, Nd4jLong sizeOf);
+        ConstantDataBuffer(const std::shared_ptr<PointerWrapper>& primary, uint64_t numEelements, DataType dype);
+        ConstantDataBuffer(const std::shared_ptr<PointerWrapper>& primary, const std::shared_ptr<PointerWrapper>& special, uint64_t numEelements, DataType dype);
         ConstantDataBuffer(const ConstantDataBuffer &other);
         ConstantDataBuffer() = default;
         ~ConstantDataBuffer() = default;
 
-        Nd4jLong sizeOf() const;
-        Nd4jLong length() const;
+        uint8_t sizeOf() const;
+        uint64_t length() const;
 
-        Nd4jPointer primary() const;
-        Nd4jPointer special() const;
+        void* primary() const;
+        void* special() const;
 
         ConstantDataBuffer& operator=(const ConstantDataBuffer& other) = default;
         ConstantDataBuffer& operator=(ConstantDataBuffer&& other) noexcept = default;
 
+        template <typename T>
+        T* primaryAsT() const;
 
         template <typename T>
-        T* primaryAsT();
-
-        template <typename T>
-        T* specialAsT();
+        T* specialAsT() const;
     };
 }
 
diff --git a/libnd4j/include/array/ConstantOffsetsBuffer.h b/libnd4j/include/array/ConstantOffsetsBuffer.h
new file mode 100644
index 000000000..61c1e381f
--- /dev/null
+++ b/libnd4j/include/array/ConstantOffsetsBuffer.h
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+#define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+#include <memory>
+#include <array/PointerWrapper.h>
+
+namespace sd {
+
+class ND4J_EXPORT ConstantOffsetsBuffer {
+ private:
+  std::shared_ptr<PointerWrapper> _primaryOffsets;
+  std::shared_ptr<PointerWrapper> _specialOffsets;
+
+ public:
+  ConstantOffsetsBuffer(const std::shared_ptr<PointerWrapper> &primary);
+  ConstantOffsetsBuffer(const std::shared_ptr<PointerWrapper> &primary, const std::shared_ptr<PointerWrapper> &special);
+  ConstantOffsetsBuffer() = default;
+  ~ConstantOffsetsBuffer() = default;
+
+  const Nd4jLong* primary() const;
+  const Nd4jLong* special() const;
+  const Nd4jLong* platform() const;
+};
+
+} // namespace sd
+
+#endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
diff --git a/libnd4j/include/array/ConstantShapeBuffer.h b/libnd4j/include/array/ConstantShapeBuffer.h
new file mode 100644
index 000000000..299653271
--- /dev/null
+++ b/libnd4j/include/array/ConstantShapeBuffer.h
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+#define SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+#include <array/PointerWrapper.h>
+#include <memory>
+
+namespace sd {
+
+class ND4J_EXPORT ConstantShapeBuffer {
+ private:
+  std::shared_ptr<PointerWrapper> _primaryShapeInfo;
+  std::shared_ptr<PointerWrapper> _specialShapeInfo;
+
+ public:
+  ConstantShapeBuffer(const std::shared_ptr<PointerWrapper> &primary);
+  ConstantShapeBuffer(const std::shared_ptr<PointerWrapper> &primary, const std::shared_ptr<PointerWrapper> &special);
+  ConstantShapeBuffer() = default;
+  ~ConstantShapeBuffer() = default;
+
+  const Nd4jLong* primary() const;
+  const Nd4jLong* special() const;
+  const Nd4jLong* platform() const;
+};
+
+} // namespace sd
+
+#endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_
diff --git a/libnd4j/include/array/CudaPointerDeallocator.h b/libnd4j/include/array/CudaPointerDeallocator.h
new file mode 100644
index 000000000..c5c817aeb
--- /dev/null
+++ b/libnd4j/include/array/CudaPointerDeallocator.h
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#ifndef SD_CUDAYPOINTERDEALLOCATOR_H_
+#define SD_CUDAYPOINTERDEALLOCATOR_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+#include <array/PointerDeallocator.h>
+
+namespace sd {
+class ND4J_EXPORT CudaPointerDeallocator : public PointerDeallocator {
+ public:
+  CudaPointerDeallocator() = default;
+  ~CudaPointerDeallocator() = default;
+
+  void release(void* ptr) override;
+};
+}
+
+#endif //SD_CUDAYPOINTERDEALLOCATOR_H_
diff --git a/libnd4j/include/array/DataTypeUtils.h b/libnd4j/include/array/DataTypeUtils.h
index bd89605d1..686b5bc97 100644
--- a/libnd4j/include/array/DataTypeUtils.h
+++ b/libnd4j/include/array/DataTypeUtils.h
@@ -110,7 +110,7 @@ namespace sd {
         // if proposed dataType is already floating point - return it
         if (isR(typeX))
             return typeX;
-        return Environment::getInstance()->defaultFloatDataType();
+        return Environment::getInstance().defaultFloatDataType();
     }
 
     FORCEINLINE bool DataTypeUtils::isR(sd::DataType dataType) {
@@ -154,7 +154,7 @@ namespace sd {
         // if both data types are float - return biggest one
         if (rX && rY) {
             // if we allow precision boost, then we pick bigger data type
-            if (sd::Environment::getInstance()->precisionBoostAllowed()) {
+            if (sd::Environment::getInstance().precisionBoostAllowed()) {
                 return nd4j_max(typeX, typeY);
             } else {
                 // and we return first operand otherwise
@@ -165,7 +165,7 @@ namespace sd {
 
         // if that's not real type, we apply same rules
         if (!rX && !rY) {
-            if (sd::Environment::getInstance()->precisionBoostAllowed()) {
+            if (sd::Environment::getInstance().precisionBoostAllowed()) {
                 return nd4j_max(typeX, typeY);
             } else {
                 // and we return first operand otherwise
diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h
index c314d25b6..7b32b7d49 100644
--- a/libnd4j/include/array/NDArray.h
+++ b/libnd4j/include/array/NDArray.h
@@ -45,6 +45,7 @@
 #include <memory>
 #include <array/InteropDataBuffer.h>
 #include <memory/MemoryCounter.h>
+#include <array/ConstantShapeBuffer.h>
 
 
 namespace sd {
@@ -155,8 +156,8 @@ namespace sd {
         /**
         *  contains shape info:  matrix rank, numbers of elements per each dimension, dimensions strides, element-wise-stride, c-like or fortan-like order
         */
-        Nd4jLong *_shapeInfo = nullptr;
-        Nd4jLong *_shapeInfoD = nullptr;
+        const Nd4jLong *_shapeInfo = nullptr;
+        const Nd4jLong *_shapeInfoD = nullptr;
 
         /**
         *  pointer on device launch context (with all data needed there).
@@ -1219,7 +1220,7 @@ namespace sd {
         void setShapeInfo(const Nd4jLong *shapeInfo);
         void setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype);
         void setShapeInfo(const ShapeDescriptor& descriptor);
-        void setShapeInfo(const ConstantDataBuffer& shapeBuffer);
+        void setShapeInfo(const ConstantShapeBuffer& shapeBuffer);
 
         /**
         *  returns absolute offset which corresponds to given sequential index
@@ -1516,9 +1517,9 @@ FORCEINLINE R NDArray::templatedGet(void const* buffer, Nd4jLong index) const {
 
 //////////////////////////////////////////////////////////////////////////
 void NDArray::setShapeInfo(Nd4jLong *shapeInfo) {
-    auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(shapeInfo);
-    _shapeInfo = buffer.primaryAsT<Nd4jLong>();
-    _shapeInfoD = buffer.specialAsT<Nd4jLong>();
+    auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(shapeInfo);
+    _shapeInfo = buffer.primary();
+    _shapeInfoD = buffer.special();
 
     if (shapeInfo != nullptr) {
         _dataType = ArrayOptions::dataType(_shapeInfo);
@@ -1535,9 +1536,9 @@ void NDArray::setShapeInfo(Nd4jLong *shapeInfo) {
 
 //////////////////////////////////////////////////////////////////////////
 void NDArray::setShapeInfo(Nd4jLong *shapeInfo, const sd::DataType dtype) {
-    auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(shapeInfo);
-    _shapeInfo = buffer.primaryAsT<Nd4jLong>();
-    _shapeInfoD = buffer.specialAsT<Nd4jLong>();
+    auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(shapeInfo);
+    _shapeInfo = buffer.primary();
+    _shapeInfoD = buffer.special();
 
     if (shapeInfo != nullptr) {
         _dataType = dtype;
@@ -1623,7 +1624,7 @@ bool NDArray::nonNull() const {
     if (isEmpty())
         return true;
 
-    if(!Environment::getInstance()->isCPU())
+    if(!Environment::getInstance().isCPU())
         return getDataBuffer()->special() != nullptr && specialShapeInfo() != nullptr;
 
     return getDataBuffer()->primary() != nullptr && shapeInfo() != nullptr;
diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX
index 9e48b05de..eefe169cf 100644
--- a/libnd4j/include/array/NDArray.hXX
+++ b/libnd4j/include/array/NDArray.hXX
@@ -181,7 +181,7 @@ NDArray::NDArray(sd::DataType dtype, sd::LaunchContext* context, const bool isSc
         _buffer->setToZeroBuffers();
     }
     else
-        setShapeInfo(ConstantShapeHelper::getInstance()->emptyShapeInfo(dtype));
+        setShapeInfo(ConstantShapeHelper::getInstance().emptyShapeInfo(dtype));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1088,9 +1088,11 @@ void NDArray::streamline(char o) {
     char order = o == 'a' ? this->ordering() : o;
     syncToDevice();
     std::shared_ptr<DataBuffer> newBuffer = std::make_shared<DataBuffer>(this->lengthOf() * sizeOfT(), dataType(), getContext()->getWorkspace());
-    auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(dataType(), order, rankOf(), shapeOf());
-    NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(), static_cast<Nd4jLong*>(shapeBuffer.primary()), newBuffer->special(), static_cast<Nd4jLong*>(shapeBuffer.special()), nullptr, nullptr, nullptr);
-    setShapeInfo(static_cast<Nd4jLong*>(shapeBuffer.primary()));
+    auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(dataType(), order, rankOf(), shapeOf());
+    NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(),
+                                           shapeBuffer.primary(), newBuffer->special(),
+                                           shapeBuffer.special(), nullptr, nullptr, nullptr);
+    setShapeInfo(shapeBuffer);
     _buffer = newBuffer;
     _offset = 0;
     tickWriteDevice();
@@ -1355,7 +1357,7 @@ NDArray NDArray::reduceAlongDimension(sd::reduce::FloatOps op, const std::vector
 
     std::vector<int> copy(dimensions);
 
-    auto newShape = ShapeUtils::evalReduceShapeInfo('c', copy, *this, isR() ? dataType() : Environment::getInstance()->defaultFloatDataType(), keepDims, supportOldShapes, getContext()->getWorkspace());
+    auto newShape = ShapeUtils::evalReduceShapeInfo('c', copy, *this, isR() ? dataType() : Environment::getInstance().defaultFloatDataType(), keepDims, supportOldShapes, getContext()->getWorkspace());
 
     NDArray result(newShape, true, getContext());
 
@@ -1432,7 +1434,7 @@ NDArray NDArray::reduceNumber(sd::reduce::FloatOps op, void *extraParams) const
     if (isS())
         throw std::runtime_error("NDArray::reduceNumber FloatOps: you can't use this method on String array!");
 
-    auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataTypeUtils::pickFloatingType(dataType()));
+    auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataTypeUtils::pickFloatingType(dataType()));
     NDArray result(shape, true, this->getContext());
 
     NDArray::prepareSpecialUse({&result}, {this});
@@ -1461,7 +1463,7 @@ NDArray NDArray::reduceNumber(sd::reduce::BoolOps op, void *extraParams) const {
     if (isS())
         throw std::runtime_error("NDArray::reduceNumber BoolOps: you can't use this method on String array!");
 
-    auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::BOOL);
+    auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::BOOL);
     NDArray result(shape, true, this->getContext());
 
     NDArray::prepareSpecialUse({&result}, {this});
@@ -1476,7 +1478,7 @@ NDArray NDArray::reduceNumber(sd::reduce::LongOps op, void *extraParams) const {
     if (isS())
         throw std::runtime_error("NDArray::reduceNumber LongOps: you can't use this method on String array!");
 
-    auto shape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64);
+    auto shape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT64);
     NDArray result(shape, true, this->getContext());
 
     NDArray::prepareSpecialUse({&result}, {this});
@@ -1854,8 +1856,7 @@ void NDArray::setAttached(bool reallyAttached) {
 //////////////////////////////////////////////////////////////////////////
 // calculate strides
 void NDArray::updateStrides(const char order) {
-    shape::updateStrides(_shapeInfo, order);
-    syncShape();
+    throw std::runtime_error("Forbidden method");
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -2456,7 +2457,7 @@ void NDArray::operator+=(const NDArray& other) {
 
     if (isS())
         throw std::runtime_error("NDArray::operator+=: you can't use this method on String array!");
-    if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
         throw sd::datatype_exception::build("NDArray operator+=: Cannot add different types", this->dataType(), other.dataType());
 
     if (this->lengthOf() != 1 && other.lengthOf() == 1) {
@@ -2490,7 +2491,7 @@ void NDArray::operator-=(const NDArray& other) {
     if (isS())
         throw std::runtime_error("NDArray::operator-=: you can't use this method on String array!");
 
-    if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
         throw sd::datatype_exception::build("NDArray operator-=: Cannot subtract different types", this->dataType(), other.dataType());
 
     if (lengthOf() != 1 && other.lengthOf() == 1) {
@@ -2523,7 +2524,7 @@ void NDArray::operator-=(const NDArray& other) {
 void NDArray::operator*=(const NDArray& other) {
     if (isS())
         throw std::runtime_error("NDArray::operator*=: you can't use this method on String array!");
-    if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType() && (this->dataType() != DataType::BOOL || other.dataType() != BOOL))
         throw sd::datatype_exception::build("NDArray operator*=: Cannot multiply different types", this->dataType(), other.dataType());
 
     if (lengthOf() != 1 && other.lengthOf() == 1) {
@@ -2559,7 +2560,7 @@ void NDArray::operator/=(const NDArray& other) {
     if (other.isB())
         throw std::runtime_error("NDArray::operator/=: you can't divide by bool array!");
 
-    if (!Environment::getInstance()->isExperimentalBuild() && this->dataType() != other.dataType()) {
+    if (!Environment::getInstance().isExperimentalBuild() && this->dataType() != other.dataType()) {
         throw sd::datatype_exception::build("NDArray operator/=: Cannot divide different types", this->dataType(), other.dataType());
     }
 
@@ -2832,14 +2833,14 @@ void NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& other,
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
-        xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
-        xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
+        xShapeInfoH = xPack.primary();
+        xShapeInfoD = xPack.special();
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
-        yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
-        yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
+        yShapeInfoH = yPack.primary();
+        yShapeInfoD = yPack.special();
     }
 
     NDArray::prepareSpecialUse({&target}, {this, &other});
@@ -2883,14 +2884,14 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
-        xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
-        xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
+        xShapeInfoH = xPack.primary();
+        xShapeInfoD = xPack.special();
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
-        yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
-        yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
+        yShapeInfoH = yPack.primary();
+        yShapeInfoD = yPack.special();
     }
 
     NDArray::prepareSpecialUse({&target}, {this, &other});
@@ -2934,12 +2935,12 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace());
         xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
         xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace());
         yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
         yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
     }
@@ -3067,7 +3068,7 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector<int>& dime
 
     // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) {
     //     NDArray::prepareSpecialUse({&target}, {this, &other});
-    //     NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr);
+    //     NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr);
     //     NDArray::registerSpecialUse({&target}, {this, &other});
     //     return;
     // }
@@ -3088,12 +3089,12 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector<int>& dime
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
         xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
         xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
         yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
         yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
     }
@@ -3119,7 +3120,7 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector<int>&
 
     // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) {
     //     NDArray::prepareSpecialUse({&target}, {this, &other});
-    //     NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr);
+    //     NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr);
     //     NDArray::registerSpecialUse({&target}, {this, &other});
     //     return;
     // }
@@ -3142,12 +3143,12 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector<int>&
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
         xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
         xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
         yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
         yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
     }
@@ -3174,7 +3175,7 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector<int>& d
 
     // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) {
     //     NDArray::prepareSpecialUse({&target}, {this, &other});
-    //     NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr);
+    //     NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.special(), nullptr);
     //     NDArray::registerSpecialUse({&target}, {this, &other});
     //     return;
     // }
@@ -3197,12 +3198,12 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector<int>& d
     Nd4jLong const* yShapeInfoD = other.specialShapeInfo();
 
     if(!isSameShape(target)) {
-        auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
+        auto xPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy);
         xShapeInfoH = reinterpret_cast<Nd4jLong const*>(xPack.primary());
         xShapeInfoD = reinterpret_cast<Nd4jLong const*>(xPack.special());
     }
     if(!other.isSameShape(target)) {
-        auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
+        auto yPack = ConstantShapeHelper::getInstance().createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy);
         yShapeInfoH = reinterpret_cast<Nd4jLong const*>(yPack.primary());
         yShapeInfoD = reinterpret_cast<Nd4jLong const*>(yPack.special());
     }
@@ -3220,8 +3221,8 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::initializer_list<
 
 ////////////////////////////////////////////////////////////////////////
 void* NDArray::operator new(size_t i) {
-    if (sd::memory::MemoryRegistrator::getInstance()->hasWorkspaceAttached()) {
-        sd::memory::Workspace* ws = sd::memory::MemoryRegistrator::getInstance()->getWorkspace();
+    if (sd::memory::MemoryRegistrator::getInstance().hasWorkspaceAttached()) {
+        sd::memory::Workspace* ws = sd::memory::MemoryRegistrator::getInstance().getWorkspace();
         return ws->allocateBytes((Nd4jLong) i);
     }
     else {
@@ -3233,7 +3234,7 @@ void* NDArray::operator new(size_t i) {
 
 ////////////////////////////////////////////////////////////////////////
 void NDArray::operator delete(void* p) {
-    if (!sd::memory::MemoryRegistrator::getInstance()->hasWorkspaceAttached())
+    if (!sd::memory::MemoryRegistrator::getInstance().hasWorkspaceAttached())
         free(p);
 }
 
@@ -3439,8 +3440,8 @@ void NDArray::varianceAlongDimension(sd::variance::Ops op, NDArray& target, cons
         NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), biasCorrected);
     else {
         std::vector<int> copy(dimensions);
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimensions);
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimensions);
         NativeOpExecutioner::execSummaryStats(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected);
         synchronize("NDArray::varianceAlongDimension");
     }
@@ -4109,8 +4110,8 @@ void NDArray::applyIndexReduce(sd::indexreduce::Ops op, NDArray& target, const s
     else {
         std::vector<int> copy = dimensions;
         shape::checkDimensions(rankOf(), copy);
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy);
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy);
         NativeOpExecutioner::execIndexReduce(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets());
         synchronize("NDArray::applyIndexReduce");
     }
@@ -4183,10 +4184,10 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const s
     }
     else {
 
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy);
-        auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy);
+        auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(other.shapeInfo(), copy);
 
         if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo()) || (packX.numberOfTads() != packY.numberOfTads() && packX.numberOfTads() != 1 && packY.numberOfTads() != 1))
             throw std::runtime_error("NDArray::applyReduce3 cuda method: arrays tads are inconsistent !");
@@ -4212,15 +4213,15 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons
     shape::checkDimensions(rankOf(), copy);
     shape::checkDimensions(other.rankOf(), copy);
 
-    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy);
-    auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy);
+    auto packX = ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy);
+    auto packY = ConstantTadHelper::getInstance().tadForDimensions(other.shapeInfo(), copy);
 
     // check tads shapes
     if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo()))
         throw std::runtime_error("NDArray::applyAllReduce3 method: the shapes of array tads are different !");
 
     // set newShape for output array
-    auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataTypeUtils::pickFloatingType(dataType()), 'c', {packX.numberOfTads(), packY.numberOfTads()});
+    auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(DataTypeUtils::pickFloatingType(dataType()), 'c', {packX.numberOfTads(), packY.numberOfTads()});
 
     // create output array
     NDArray result(newShape, true, getContext());
@@ -4228,7 +4229,7 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons
     // create dynamic array of extra parameters if array extraParams is empty (==nullptr)
     void* params = extraParams != nullptr ? const_cast<ExtraArguments*>(extraParams)->argumentsAsT(dataType()) : nullptr;
 
-    auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
+    auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
 
     NDArray::prepareSpecialUse({&result}, {this, &other});
     NativeOpExecutioner::execReduce3All(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets());
@@ -4260,7 +4261,7 @@ void NDArray::reduceAlongDimension(sd::reduce::FloatOps op, NDArray& target, con
         NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(),nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo());
     }
     else {
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), copy);
         NativeOpExecutioner::execReduceFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets());
     }
     synchronize("NDArray::reduceAlongDimension FloatOps");
@@ -4291,8 +4292,8 @@ void NDArray::reduceAlongDimension(sd::reduce::SameOps op, NDArray& target, cons
         NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo());
     }
     else { //if (!isEmpty()) {
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy);
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy);
         NativeOpExecutioner::execReduceSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets());
     }
     synchronize("NDArray::reduceAlongDimension SameOps");
@@ -4323,8 +4324,8 @@ void NDArray::reduceAlongDimension(sd::reduce::LongOps op, NDArray& target, cons
         NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo());
     }
     else {
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy);
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy);
         NativeOpExecutioner::execReduceLong(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets());
     }
     synchronize("NDArray::reduceAlongDimension LongOps");
@@ -4355,8 +4356,8 @@ void NDArray::reduceAlongDimension(sd::reduce::BoolOps op, NDArray& target, cons
         NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo());
     }
     else {
-        auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr;
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy);
+        auto pDims = sd::Environment::getInstance().isCPU() ? copy.data() : nullptr;
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), copy);
         NativeOpExecutioner::execReduceBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets());
     }
     synchronize("NDArray::reduceAlongDimension LongOps");
@@ -4524,7 +4525,7 @@ void NDArray::addRowVector(const NDArray& row, NDArray& target) const {
 
     int dimension = 1;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({&target}, {this, &row});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4543,7 +4544,7 @@ void NDArray::subRowVector(const NDArray& row, NDArray& target) const {
 
     int dimension = 1;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({&target}, {this, &row});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4563,7 +4564,7 @@ void NDArray::mulRowVector(const NDArray &row, NDArray &target) const {
 
     int dimension = 1;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({&target}, {this, &row});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4584,7 +4585,7 @@ void NDArray::divRowVector(const NDArray &row, NDArray &target) const {
 
     int dimension = 1;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({&target}, {this, &row});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4602,7 +4603,7 @@ void NDArray::addiRowVector(const NDArray& row) {
 
     int dimension = 1;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({this}, {&row});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4620,7 +4621,7 @@ void NDArray::addColumnVector(const NDArray &column, NDArray &target) const {
 
     int dimension = 0;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({&target}, {this, &column});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4637,7 +4638,7 @@ void NDArray::addiColumnVector(const NDArray &column) {
 
     int dimension = 0;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({this}, {&column});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4654,7 +4655,7 @@ void NDArray::muliColumnVector(const NDArray& column) {
 
     int dimension = 0;
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(this->shapeInfo(), dimension);
 
     NDArray::prepareSpecialUse({this}, {&column});
     NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr);
@@ -4695,7 +4696,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector<int> &indices
     if (indices.size() == 0)
         return result;
 
-    auto pack = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), const_cast<int*>(dimensions.data()), dimensions.size());
+    auto pack = ConstantTadHelper::getInstance().tadForDimensions(shapeInfo(), const_cast<int*>(dimensions.data()), dimensions.size());
 
     auto tadLength = shape::length(pack.primaryShapeInfo());
     auto numTads = lengthOf() / tadLength;
@@ -4816,7 +4817,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector<int> &dimensions)
         throw std::runtime_error("NDArray::allTensorsAlongDimension static function: all input dimensions must be smaller than rank of input array !");
 
 
-    auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_shapeInfo, const_cast<int*>(dimensions.data()), dimensions.size());
+    auto pack = ConstantTadHelper::getInstance().tadForDimensions(_shapeInfo, const_cast<int*>(dimensions.data()), dimensions.size());
     auto numTads = pack.numberOfTads();
 
     for (Nd4jLong idx = 0; idx < numTads; idx++ ) {
@@ -4929,11 +4930,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo) {
     if (shapeInfo != nullptr) {
 
         ShapeDescriptor descriptor(shapeInfo);
-        auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor);
+        auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor);
 
-        _shapeInfo  = reinterpret_cast<Nd4jLong *>(shapeBuffer.primary());
+        _shapeInfo  = shapeBuffer.primary();
         #ifdef __CUDABLAS__
-            _shapeInfoD = reinterpret_cast<Nd4jLong *>(shapeBuffer.special());
+            _shapeInfoD = shapeBuffer.special();
         #endif
 
         if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY)
@@ -4956,11 +4957,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype)
 
         Nd4jLong* shapeInfoTemp = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dtype, true, getContext()->getWorkspace());
         ShapeDescriptor descriptor(shapeInfoTemp);
-        auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor);
+        auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor);
 
-        _shapeInfo  = reinterpret_cast<Nd4jLong *>(shapeBuffer.primary());
+        _shapeInfo  = shapeBuffer.primary();
         #ifdef __CUDABLAS__
-            _shapeInfoD = reinterpret_cast<Nd4jLong *>(shapeBuffer.special());
+            _shapeInfoD = shapeBuffer.special();
         #endif
 
         if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY)
@@ -4979,11 +4980,11 @@ void NDArray::setShapeInfo(const Nd4jLong *shapeInfo, const sd::DataType dtype)
 //////////////////////////////////////////////////////////////////////////
 void NDArray::setShapeInfo(const ShapeDescriptor& descriptor) {
 
-    auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(const_cast<ShapeDescriptor &>(descriptor));
+    auto shapeBuffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(const_cast<ShapeDescriptor &>(descriptor));
 
-    _shapeInfo  = reinterpret_cast<Nd4jLong *>(shapeBuffer.primary());
+    _shapeInfo  = shapeBuffer.primary();
     #ifdef __CUDABLAS__
-        _shapeInfoD = reinterpret_cast<Nd4jLong *>(shapeBuffer.special());
+        _shapeInfoD = shapeBuffer.special();
     #endif
 
     if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY)
@@ -4995,11 +4996,11 @@ void NDArray::setShapeInfo(const ShapeDescriptor& descriptor) {
 }
 
 //////////////////////////////////////////////////////////////////////////
-void NDArray::setShapeInfo(const ConstantDataBuffer& shapeBuffer) {
+void NDArray::setShapeInfo(const ConstantShapeBuffer& shapeBuffer) {
 
-    _shapeInfo  = reinterpret_cast<Nd4jLong *>(const_cast<ConstantDataBuffer&>(shapeBuffer).primary());
+    _shapeInfo  = shapeBuffer.primary();
     #ifdef __CUDABLAS__
-    _shapeInfoD = reinterpret_cast<Nd4jLong *>(const_cast<ConstantDataBuffer&>(shapeBuffer).special());
+    _shapeInfoD = shapeBuffer.special();
     #endif
 
     if(ArrayOptions::arrayType(_shapeInfo) == ArrayType::EMPTY)
@@ -5350,7 +5351,7 @@ NDArray operator+(T1&& arr1, T2&& arr2) {
 
     if (arr1.isS() || arr2.isS())
         throw std::runtime_error("operator+(T&& arr1, T&& arr2): you can't use this method on String arrays!");
-    if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
         throw sd::datatype_exception::build("operator+(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType());
 
     PointersManager pointersManager(arr1.getContext(), "operator+(T&& arr1, T&& arr2)");
@@ -5400,7 +5401,7 @@ NDArray operator-(T1&& arr1, T2&& arr2) {
 
     if (arr1.isS() || arr2.isS())
         throw std::runtime_error("operator-(T&& arr1, T&& arr2): you can't use this method on String arrays!");
-    if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
         throw sd::datatype_exception::build("operator-(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType());
 
     PointersManager pointersManager(arr1.getContext(), "operator-(T&& arr1, T&& arr2)");
@@ -5450,7 +5451,7 @@ NDArray operator*(T1&& arr1, T2&& arr2) {
 
     if (arr1.isS() || arr2.isS())
         throw std::runtime_error("operator*(T&& arr1, T&& arr2): you can't use this method on String arrays!");
-    if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
         throw sd::datatype_exception::build("operator*(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType());
 
     PointersManager pointersManager(arr1.getContext(), "operator*(T&& arr1, T&& arr2)");
@@ -5500,7 +5501,7 @@ NDArray operator/(T1&& arr1, T2&& arr2) {
 
     if (arr1.isS() || arr2.isS())
         throw std::runtime_error("operator/(T&& arr1, T&& arr2): you can't use this method on String arrays!");
-    if (!Environment::getInstance()->isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
+    if (!Environment::getInstance().isExperimentalBuild() && arr1.dataType() != arr2.dataType() && (arr1.dataType() != DataType::BOOL || arr2.dataType() != BOOL))
         throw sd::datatype_exception::build("operator/(T&& arr1, T&& arr2): Cannot multiply different types", arr1.dataType(), arr2.dataType());
 
     PointersManager pointersManager(arr1.getContext(), "operator/(T&& arr1, T&& arr2)");
diff --git a/libnd4j/include/array/PointerDeallocator.h b/libnd4j/include/array/PointerDeallocator.h
new file mode 100644
index 000000000..5bf820421
--- /dev/null
+++ b/libnd4j/include/array/PointerDeallocator.h
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#ifndef SD_POINTERDEALLOCATOR_H_
+#define SD_POINTERDEALLOCATOR_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+
+namespace sd {
+
+class ND4J_EXPORT PointerDeallocator {
+ public:
+  PointerDeallocator() = default;
+  ~PointerDeallocator() = default;
+
+  virtual void release(void* ptr);
+};
+
+}
+
+#endif //SD_POINTERDEALLOCATOR_H_
diff --git a/libnd4j/include/array/PointerWrapper.h b/libnd4j/include/array/PointerWrapper.h
new file mode 100644
index 000000000..9e15aaaa3
--- /dev/null
+++ b/libnd4j/include/array/PointerWrapper.h
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#ifndef SD_ARRAY_POINTER_H_
+#define SD_ARRAY_POINTER_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+#include <array/PointerDeallocator.h>
+#include <memory>
+
+namespace sd {
+class ND4J_EXPORT PointerWrapper {
+ private:
+  void* _pointer = nullptr;
+  std::shared_ptr<PointerDeallocator> _deallocator;
+
+ public:
+  PointerWrapper(void* ptr, const std::shared_ptr<PointerDeallocator> &deallocator = {});
+  PointerWrapper() = default;
+  ~PointerWrapper();
+
+  void* pointer() const;
+
+  template <typename T>
+  T* pointerAsT() const {
+    return reinterpret_cast<T*>(pointer());
+  }
+};
+} // namespace sd
+
+#endif //SD_ARRAY_POINTER_H_
diff --git a/libnd4j/include/array/PrimaryPointerDeallocator.h b/libnd4j/include/array/PrimaryPointerDeallocator.h
new file mode 100644
index 000000000..b4fe34764
--- /dev/null
+++ b/libnd4j/include/array/PrimaryPointerDeallocator.h
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#ifndef SD_PRIMARYPOINTERDEALLOCATOR_H_
+#define SD_PRIMARYPOINTERDEALLOCATOR_H_
+
+#include <system/dll.h>
+#include <system/pointercast.h>
+#include <array/PointerDeallocator.h>
+
+namespace sd {
+class ND4J_EXPORT PrimaryPointerDeallocator : public PointerDeallocator {
+ public:
+  PrimaryPointerDeallocator() = default;
+  ~PrimaryPointerDeallocator() = default;
+
+  void release(void* ptr) override;
+};
+}
+
+#endif //SD_PRIMARYPOINTERDEALLOCATOR_H_
diff --git a/libnd4j/include/array/TadPack.h b/libnd4j/include/array/TadPack.h
index 3cd95fa59..f7ca15fd9 100644
--- a/libnd4j/include/array/TadPack.h
+++ b/libnd4j/include/array/TadPack.h
@@ -21,17 +21,18 @@
 #ifndef DEV_TESTS_TADPACK_H
 #define DEV_TESTS_TADPACK_H
 
-#include "ConstantDataBuffer.h"
+#include <array/ConstantOffsetsBuffer.h>
+#include <array/ConstantShapeBuffer.h>
 
 namespace sd {
     class ND4J_EXPORT TadPack {
     private:
-        ConstantDataBuffer _tadShape;
-        ConstantDataBuffer _tadOffsets;
+        ConstantShapeBuffer _tadShape;
+        ConstantOffsetsBuffer _tadOffsets;
         Nd4jLong _numTads = 0 ;
         int _shapeInfoLength = 0;
     public:
-        explicit TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads);
+        explicit TadPack(const ConstantShapeBuffer &shapes, const ConstantOffsetsBuffer &offets, Nd4jLong numTads);
         TadPack() = default;
         ~TadPack() = default;
 
diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp
index 873b3fec9..398ebe5e8 100644
--- a/libnd4j/include/array/cpu/NDArray.cpp
+++ b/libnd4j/include/array/cpu/NDArray.cpp
@@ -338,7 +338,7 @@ void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
     const int ews = target.ews();
     const auto targetLen = target.lengthOf();
     if(target.ordering() == 'c' && ews == 1) {           //  ews == 1 always here
-//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
+//#pragma omp parallel for simd if(targetLen > Environment::getInstance().elementwiseThreshold()) schedule(guided)
         for(Nd4jLong i=0;  i<targetLen; ++i) {
             auto yOffset = shape::subArrayOffset(i, target.shapeInfo(), shapeInfo());
             BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.buffer(), i, buffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
diff --git a/libnd4j/include/array/cuda/CudaPointerDeallocator.cu b/libnd4j/include/array/cuda/CudaPointerDeallocator.cu
new file mode 100644
index 000000000..7367382ba
--- /dev/null
+++ b/libnd4j/include/array/cuda/CudaPointerDeallocator.cu
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#include <array/CudaPointerDeallocator.h>
+
+namespace sd {
+
+void CudaPointerDeallocator::release(void *ptr) {
+  cudaFree(ptr);
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/cuda/DataBuffer.cu b/libnd4j/include/array/cuda/DataBuffer.cu
index 922b6967b..7e88e06ba 100644
--- a/libnd4j/include/array/cuda/DataBuffer.cu
+++ b/libnd4j/include/array/cuda/DataBuffer.cu
@@ -70,16 +70,16 @@ void DataBuffer::allocateSpecial() {
         auto deviceId = sd::AffinityManager::currentDeviceId();
 
         if (_workspace == nullptr)
-            if (!sd::memory::MemoryCounter::getInstance()->validate(getLenInBytes()))
-                throw sd::allocation_exception::build("Requested amount exceeds device limits", sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId), getLenInBytes());
+            if (!sd::memory::MemoryCounter::getInstance().validate(getLenInBytes()))
+                throw sd::allocation_exception::build("Requested amount exceeds device limits", sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId), getLenInBytes());
 
 
         ALLOCATE_SPECIAL(_specialBuffer, _workspace, getLenInBytes(), int8_t);
         _isOwnerSpecial = true;
 
         if (_workspace == nullptr) {
-            sd::memory::MemoryCounter::getInstance()->countIn(deviceId, getLenInBytes());
-            sd::memory::MemoryCounter::getInstance()->countIn(sd::memory::MemoryType::DEVICE, getLenInBytes());
+            sd::memory::MemoryCounter::getInstance().countIn(deviceId, getLenInBytes());
+            sd::memory::MemoryCounter::getInstance().countIn(sd::memory::MemoryType::DEVICE, getLenInBytes());
         }
     }
 }
@@ -135,8 +135,8 @@ void DataBuffer::deleteSpecial() {
 
         // count out towards DataBuffer device, only if we're not in workspace
         if (_workspace == nullptr) {
-            sd::memory::MemoryCounter::getInstance()->countOut(_deviceId, getLenInBytes());
-            sd::memory::MemoryCounter::getInstance()->countOut(sd::memory::MemoryType::DEVICE, getLenInBytes());
+            sd::memory::MemoryCounter::getInstance().countOut(_deviceId, getLenInBytes());
+            sd::memory::MemoryCounter::getInstance().countOut(sd::memory::MemoryType::DEVICE, getLenInBytes());
         }
     }
 }
diff --git a/libnd4j/include/array/cuda/NDArray.cu b/libnd4j/include/array/cuda/NDArray.cu
index 8ed3eceeb..f28e2ba22 100644
--- a/libnd4j/include/array/cuda/NDArray.cu
+++ b/libnd4j/include/array/cuda/NDArray.cu
@@ -53,7 +53,7 @@ void* NDArray::platformBuffer()             { return specialBuffer();    }
 void const* NDArray::platformBuffer() const    { return specialBuffer(); }
 
 Nd4jLong const* NDArray::platformShapeInfo() const { return specialShapeInfo(); }
-//Nd4jLong const* NDArray::platformShapeInfo()          { return specialShapeInfo(); }
+//Nd4jLong const* NDArray::platform()          { return special(); }
 
 void NDArray::syncToDevice() const          {
     auto currentDeviceId = AffinityManager::currentDeviceId();
diff --git a/libnd4j/include/array/impl/ConstantDataBuffer.cpp b/libnd4j/include/array/impl/ConstantDataBuffer.cpp
index 20c842266..2aeda3b6d 100644
--- a/libnd4j/include/array/impl/ConstantDataBuffer.cpp
+++ b/libnd4j/include/array/impl/ConstantDataBuffer.cpp
@@ -18,29 +18,38 @@
 // @author raver119@gmail.com
 //
 
-#include "../ConstantDataBuffer.h"
+#include <array/ConstantDataBuffer.h>
+#include <array/DataTypeUtils.h>
 
 namespace sd {
-    ConstantDataBuffer::ConstantDataBuffer(Nd4jPointer primary, Nd4jPointer special, Nd4jLong numEelements, Nd4jLong sizeOf) {
-        _primaryBuffer = primary;
-        _specialBuffer = special;
-        _length = numEelements;
-        _sizeOf = sizeOf;
+ConstantDataBuffer::ConstantDataBuffer(
+    const std::shared_ptr<PointerWrapper>& primary,
+    uint64_t numEelements,
+    DataType dtype) : ConstantDataBuffer(primary, {}, numEelements, dtype)   {
+  //
+}
+
+ConstantDataBuffer::ConstantDataBuffer(
+    const std::shared_ptr<PointerWrapper>& primary,
+    const std::shared_ptr<PointerWrapper>& special,
+    uint64_t numEelements,
+    DataType dtype) : _primaryBuffer(primary), _specialBuffer(special), _length(numEelements) {
+        _sizeOf = DataTypeUtils::sizeOf(dtype);
     }
 
-    Nd4jPointer ConstantDataBuffer::primary() const {
-        return _primaryBuffer;
+    void* ConstantDataBuffer::primary() const {
+        return _primaryBuffer->pointer();
     }
 
-    Nd4jPointer ConstantDataBuffer::special() const {
-        return _specialBuffer;
+    void* ConstantDataBuffer::special() const {
+        return _specialBuffer ? _specialBuffer->pointer() : nullptr;
     }
 
-    Nd4jLong ConstantDataBuffer::sizeOf() const {
+    uint8_t ConstantDataBuffer::sizeOf() const {
         return _sizeOf;
     }
 
-    Nd4jLong ConstantDataBuffer::length() const {
+    uint64_t ConstantDataBuffer::length() const {
         return _length;
     }
 
@@ -52,21 +61,21 @@ namespace sd {
     }
 
     template <typename T>
-    T* ConstantDataBuffer::primaryAsT() {
-        return reinterpret_cast<T*>(_primaryBuffer);
+    T* ConstantDataBuffer::primaryAsT() const {
+        return reinterpret_cast<T*>(_primaryBuffer->pointer());
     }
-    template ND4J_EXPORT float* ConstantDataBuffer::primaryAsT<float>();
-    template ND4J_EXPORT double* ConstantDataBuffer::primaryAsT<double>();
-    template ND4J_EXPORT int* ConstantDataBuffer::primaryAsT<int>();
-    template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::primaryAsT<Nd4jLong>();
+    template ND4J_EXPORT float* ConstantDataBuffer::primaryAsT<float>() const;
+    template ND4J_EXPORT double* ConstantDataBuffer::primaryAsT<double>() const;
+    template ND4J_EXPORT int* ConstantDataBuffer::primaryAsT<int>() const;
+    template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::primaryAsT<Nd4jLong>() const;
 
     template <typename T>
-    T* ConstantDataBuffer::specialAsT() {
-        return reinterpret_cast<T*>(_specialBuffer);
+    T* ConstantDataBuffer::specialAsT() const {
+        return reinterpret_cast<T*>(special());
     }
-    template ND4J_EXPORT float* ConstantDataBuffer::specialAsT<float>();
-    template ND4J_EXPORT double* ConstantDataBuffer::specialAsT<double>();
-    template ND4J_EXPORT int* ConstantDataBuffer::specialAsT<int>();
-    template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::specialAsT<Nd4jLong>();
+    template ND4J_EXPORT float* ConstantDataBuffer::specialAsT<float>() const;
+    template ND4J_EXPORT double* ConstantDataBuffer::specialAsT<double>() const;
+    template ND4J_EXPORT int* ConstantDataBuffer::specialAsT<int>() const;
+    template ND4J_EXPORT Nd4jLong* ConstantDataBuffer::specialAsT<Nd4jLong>() const;
 
 }
diff --git a/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp b/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp
new file mode 100644
index 000000000..38b516a84
--- /dev/null
+++ b/libnd4j/include/array/impl/ConstantOffsetsBuffer.cpp
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#include <array/ConstantOffsetsBuffer.h>
+
+namespace sd {
+ConstantOffsetsBuffer::ConstantOffsetsBuffer(const std::shared_ptr<PointerWrapper> &primary) :
+                       ConstantOffsetsBuffer(primary, std::shared_ptr<PointerWrapper>(nullptr)) {
+  //
+}
+
+ConstantOffsetsBuffer::ConstantOffsetsBuffer(const std::shared_ptr<PointerWrapper> &primary,
+                                             const std::shared_ptr<PointerWrapper> &special) {
+  _primaryOffsets = primary;
+  _specialOffsets = special;
+}
+
+const Nd4jLong *ConstantOffsetsBuffer::primary() const {
+  return reinterpret_cast<Nd4jLong*>(_primaryOffsets->pointer());
+}
+
+const Nd4jLong *ConstantOffsetsBuffer::special() const {
+  return _specialOffsets ? reinterpret_cast<Nd4jLong*>(_specialOffsets->pointer()) : nullptr;
+}
+
+const Nd4jLong *ConstantOffsetsBuffer::platform() const {
+#ifdef __CUDABLAS__
+  return special();
+#else
+  return primary();
+#endif // CUDABLAS
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/impl/ConstantShapeBuffer.cpp b/libnd4j/include/array/impl/ConstantShapeBuffer.cpp
new file mode 100644
index 000000000..528101100
--- /dev/null
+++ b/libnd4j/include/array/impl/ConstantShapeBuffer.cpp
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+#include <array/ConstantShapeBuffer.h>
+
+namespace sd {
+ConstantShapeBuffer::ConstantShapeBuffer(const std::shared_ptr<PointerWrapper> &primary) :
+                     ConstantShapeBuffer(primary, std::shared_ptr<PointerWrapper>(nullptr)) {
+  //
+}
+
+ConstantShapeBuffer::ConstantShapeBuffer(const std::shared_ptr<PointerWrapper> &primary,
+                                         const std::shared_ptr<PointerWrapper> &special) {
+  _primaryShapeInfo = primary;
+  _specialShapeInfo = special;
+}
+
+const Nd4jLong *ConstantShapeBuffer::primary() const {
+  return reinterpret_cast<Nd4jLong*>(_primaryShapeInfo->pointer());
+}
+
+const Nd4jLong *ConstantShapeBuffer::special() const {
+  return _specialShapeInfo ? reinterpret_cast<Nd4jLong*>(_specialShapeInfo->pointer()) : nullptr;
+}
+
+const Nd4jLong *ConstantShapeBuffer::platform() const {
+#ifdef __CUDABLAS__
+  return special();
+#else
+  return primary();
+#endif // CUDABLAS
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/impl/DataBuffer.cpp b/libnd4j/include/array/impl/DataBuffer.cpp
index 262460e8c..89c386c3d 100644
--- a/libnd4j/include/array/impl/DataBuffer.cpp
+++ b/libnd4j/include/array/impl/DataBuffer.cpp
@@ -237,14 +237,14 @@ namespace sd {
             auto deviceId = sd::AffinityManager::currentDeviceId();
             // check if this allocation won't bring us above limit
             if (_workspace == nullptr) {
-                if (Environment::getInstance()->isCPU()) {
+                if (Environment::getInstance().isCPU()) {
                     // on cpu backend we validate against device 0 for now
-                    if (!sd::memory::MemoryCounter::getInstance()->validate(getLenInBytes()))
-                        throw sd::allocation_exception::build("Requested amount exceeds HOST device limits", sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId), getLenInBytes());
+                    if (!sd::memory::MemoryCounter::getInstance().validate(getLenInBytes()))
+                        throw sd::allocation_exception::build("Requested amount exceeds HOST device limits", sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId), getLenInBytes());
                 } else {
                     // in heterogenous mode we valdate against device group
-                    if (!sd::memory::MemoryCounter::getInstance()->validateGroup(sd::memory::MemoryType::HOST, getLenInBytes()))
-                        throw sd::allocation_exception::build("Requested amount exceeds HOST group limits", sd::memory::MemoryCounter::getInstance()->groupLimit(sd::memory::MemoryType::HOST), getLenInBytes());
+                    if (!sd::memory::MemoryCounter::getInstance().validateGroup(sd::memory::MemoryType::HOST, getLenInBytes()))
+                        throw sd::allocation_exception::build("Requested amount exceeds HOST group limits", sd::memory::MemoryCounter::getInstance().groupLimit(sd::memory::MemoryType::HOST), getLenInBytes());
                 }
             }
 
@@ -253,10 +253,10 @@ namespace sd {
 
             // count in towards current deviceId if we're not in workspace mode
             if (_workspace == nullptr) {
-                if (Environment::getInstance()->isCPU()) // we don't want this counter to be added to CUDA device
-                    sd::memory::MemoryCounter::getInstance()->countIn(deviceId, getLenInBytes());
+                if (Environment::getInstance().isCPU()) // we don't want this counter to be added to CUDA device
+                    sd::memory::MemoryCounter::getInstance().countIn(deviceId, getLenInBytes());
 
-                sd::memory::MemoryCounter::getInstance()->countIn(sd::memory::MemoryType::HOST, getLenInBytes());
+                sd::memory::MemoryCounter::getInstance().countIn(sd::memory::MemoryType::HOST, getLenInBytes());
             }
         }
     }
@@ -279,10 +279,10 @@ namespace sd {
 
             // count out towards DataBuffer device, only if we're not in workspace
             if (_workspace == nullptr) {
-                if (Environment::getInstance()->isCPU())
-                    sd::memory::MemoryCounter::getInstance()->countOut(_deviceId, getLenInBytes());
+                if (Environment::getInstance().isCPU())
+                    sd::memory::MemoryCounter::getInstance().countOut(_deviceId, getLenInBytes());
 
-                sd::memory::MemoryCounter::getInstance()->countOut(sd::memory::MemoryType::HOST, getLenInBytes());
+                sd::memory::MemoryCounter::getInstance().countOut(sd::memory::MemoryType::HOST, getLenInBytes());
             }
         }
     }
diff --git a/libnd4j/include/array/impl/PointerDeallocator.cpp b/libnd4j/include/array/impl/PointerDeallocator.cpp
new file mode 100644
index 000000000..2cd41cdda
--- /dev/null
+++ b/libnd4j/include/array/impl/PointerDeallocator.cpp
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#include <array/PointerDeallocator.h>
+
+namespace sd {
+
+void PointerDeallocator::release(void *ptr) {
+  // noop
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/impl/PointerWrapper.cpp b/libnd4j/include/array/impl/PointerWrapper.cpp
new file mode 100644
index 000000000..b39cb54aa
--- /dev/null
+++ b/libnd4j/include/array/impl/PointerWrapper.cpp
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#include <array/PointerWrapper.h>
+
+namespace sd {
+PointerWrapper::PointerWrapper(void *ptr, const std::shared_ptr<PointerDeallocator> &deallocator): _pointer(ptr), _deallocator(deallocator) {
+ //
+}
+
+PointerWrapper::~PointerWrapper() {
+  if (_deallocator.get() != nullptr)
+    _deallocator->release(_pointer);
+}
+
+void *PointerWrapper::pointer() const {
+  return _pointer;
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp b/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp
new file mode 100644
index 000000000..edd58d610
--- /dev/null
+++ b/libnd4j/include/array/impl/PrimaryPointerDeallocator.cpp
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+#include <array/PrimaryPointerDeallocator.h>
+
+namespace sd {
+
+void PrimaryPointerDeallocator::release(void *ptr) {
+  delete[] reinterpret_cast<int8_t*>(ptr);
+}
+
+} // namespace sd
diff --git a/libnd4j/include/array/impl/TadPack.cpp b/libnd4j/include/array/impl/TadPack.cpp
index 7a3bdbe36..e489d0e83 100644
--- a/libnd4j/include/array/impl/TadPack.cpp
+++ b/libnd4j/include/array/impl/TadPack.cpp
@@ -23,26 +23,24 @@
 #include <helpers/shape.h>
 
 namespace sd {
-    TadPack::TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads) {
-        _tadShape = shapes;
-        _tadOffsets = offets;
+    TadPack::TadPack(const ConstantShapeBuffer &shapes, const ConstantOffsetsBuffer &offets, Nd4jLong numTads) : _tadShape(shapes), _tadOffsets(offets) {
         _numTads = numTads;
     }
 
     const Nd4jLong* TadPack::primaryShapeInfo() const {
-        return reinterpret_cast<Nd4jLong *>(_tadShape.primary());
+        return _tadShape.primary();
     }
 
     const Nd4jLong* TadPack::primaryOffsets() const {
-        return reinterpret_cast<Nd4jLong *>(_tadOffsets.primary());
+        return _tadOffsets.primary();
     }
 
     const Nd4jLong* TadPack::specialShapeInfo() const {
-        return reinterpret_cast<Nd4jLong *>(_tadShape.special());
+        return _tadShape.special();
     }
 
     const Nd4jLong* TadPack::specialOffsets() const {
-        return reinterpret_cast<Nd4jLong *>(_tadOffsets.special());
+        return _tadOffsets.special();
     }
 
     Nd4jLong TadPack::numberOfTads() const {
@@ -50,11 +48,11 @@ namespace sd {
     }
 
     const Nd4jLong* TadPack::platformShapeInfo() const {
-        return sd::Environment::getInstance()->isCPU() ? primaryShapeInfo() : specialShapeInfo();
+        return sd::Environment::getInstance().isCPU() ? primaryShapeInfo() : specialShapeInfo();
     }
 
     const Nd4jLong* TadPack::platformOffsets() const {
-        return sd::Environment::getInstance()->isCPU() ? primaryOffsets() : specialOffsets();
+        return sd::Environment::getInstance().isCPU() ? primaryOffsets() : specialOffsets();
     }
 
     int TadPack::shapeInfoLength() const {
diff --git a/libnd4j/include/execution/ThreadPool.h b/libnd4j/include/execution/ThreadPool.h
index 6811f1b1c..ce44d5ae2 100644
--- a/libnd4j/include/execution/ThreadPool.h
+++ b/libnd4j/include/execution/ThreadPool.h
@@ -35,9 +35,7 @@
 namespace samediff {
     class ND4J_EXPORT ThreadPool {
     private:
-        static ThreadPool* _INSTANCE;
-
-        std::vector<std::thread*> _threads;
+        std::vector<std::thread> _threads;
         std::vector<BlockingQueue<CallableWithArguments*>*> _queues;
         std::vector<CallableInterface*> _interfaces;
 
@@ -48,7 +46,7 @@ namespace samediff {
         ThreadPool();
         ~ThreadPool();
     public:
-        static ThreadPool* getInstance();
+        static ThreadPool& getInstance();
 
         /**
          * This method returns list of pointers to threads ONLY if num_threads of threads were available upon request, returning empty list otherwise
diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h
index 2ea8295a8..bf35de089 100644
--- a/libnd4j/include/execution/Threads.h
+++ b/libnd4j/include/execution/Threads.h
@@ -107,7 +107,7 @@ namespace samediff {
          * @param increment
          * @return
          */
-        static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
         /**
          * This function executes 1 dimensional loop for a given number of threads
@@ -119,7 +119,7 @@ namespace samediff {
          * @param numThreads
          * @return
          */
-        static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
         /**
          * This method will execute function splitting 2 nested loops space with multiple threads
@@ -134,7 +134,7 @@ namespace samediff {
          * @param inc_y
          * @return
          */
-        static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads(), bool debug = false);
+        static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads(), bool debug = false);
 
         /**
          * This method will execute function splitting 3 nested loops space with multiple threads
@@ -152,7 +152,7 @@ namespace samediff {
          * @param inc_z
          * @return
          */
-        static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
         /**
          *
@@ -160,18 +160,18 @@ namespace samediff {
          * @param numThreads
          * @return
          */
-        static int parallel_do(FUNC_DO function, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int parallel_do(FUNC_DO function, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
-        static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
-        static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
 
         /**
          * This method will execute function in parallel preserving the parts to be aligned increment size
          * PLEASE NOTE: this function can use smaller number of threads than requested.
          *
         */
-        static int  parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads());
+        static int  parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads());
 
     };
 }
diff --git a/libnd4j/include/execution/cpu/LaunchContext.cpp b/libnd4j/include/execution/cpu/LaunchContext.cpp
index 23e78c350..31cb6889d 100644
--- a/libnd4j/include/execution/cpu/LaunchContext.cpp
+++ b/libnd4j/include/execution/cpu/LaunchContext.cpp
@@ -61,14 +61,19 @@ namespace sd {
 
     }
 
-    LaunchContext* LaunchContext::defaultContext() {
-        // TODO: we need it to be device-aware, but only once we add NUMA support for cpu
-        if (LaunchContext::_contexts.empty()) {
-           LaunchContext::_contexts.emplace_back(std::make_shared<LaunchContext>());
-        }
+    static std::mutex _lock;
 
-        // return context for current device
-        return LaunchContext::_contexts[0].get();
+    LaunchContext* LaunchContext::defaultContext() {
+      {
+        // synchronous block goes here
+        std::lock_guard<std::mutex> lock(_lock);
+        // TODO: we need it to be device-aware, but only once we add NUMA support for cpu
+        if (LaunchContext::_contexts.empty())
+          LaunchContext::_contexts.emplace_back(std::make_shared<LaunchContext>());
+      }
+
+      // return context for current device
+      return LaunchContext::_contexts[0].get();
     }
 
     std::mutex* LaunchContext::deviceMutex() {
diff --git a/libnd4j/include/execution/cuda/LaunchContext.cu b/libnd4j/include/execution/cuda/LaunchContext.cu
index 8380e50bf..bd51c3504 100644
--- a/libnd4j/include/execution/cuda/LaunchContext.cu
+++ b/libnd4j/include/execution/cuda/LaunchContext.cu
@@ -1,5 +1,6 @@
 /*******************************************************************************
  * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2020 Konduit K.K.
  *
  * This program and the accompanying materials are made available under the
  * terms of the Apache License, Version 2.0 which is available at
@@ -15,7 +16,7 @@
  ******************************************************************************/
 
 //
-// Created by raver119 on 30.11.17.
+// @author raver119@gmail.com
 //
 
 #include <execution/LaunchContext.h>
@@ -75,36 +76,37 @@ LaunchContext::LaunchContext() {
     }
 
     LaunchContext* LaunchContext::defaultContext() {
-        /**
-        * This method returns LaunchContext, that has multiple entities within:
-        * 1) temporary buffers. they must be per-thread
-        * 2) CUDA stream. it must be either per-thread or per-device
-        * 3) cuBLAS handle. it must be per-device
-        */
-        auto deviceId = AffinityManager::currentDeviceId();
+      /**
+       * This method returns LaunchContext, that has multiple entities within:
+       * 1) temporary buffers. they must be per-thread
+       * 2) CUDA stream. it must be either per-thread or per-device
+       * 3) cuBLAS handle. it must be per-device
+       */
+      auto deviceId = AffinityManager::currentDeviceId();
 
+      {
         // we need this block synchronous, to avoid double initialization etc
-        _mutex.lock();
+        std::lock_guard<std::mutex> lock(_mutex);
         if (LaunchContext::_contexts.empty()) {
-            // create one context per device
-            auto numDevices = AffinityManager::numberOfDevices();
+          // create one context per device
+          auto numDevices = AffinityManager::numberOfDevices();
 
-            _contexts.resize(numDevices);
-            for (int e = 0; e < numDevices; e++) {
-                _deviceMutexes[e] = new std::mutex();
+          _contexts.resize(numDevices);
+          for (int e = 0; e < numDevices; e++) {
+            _deviceMutexes[e] = new std::mutex();
 
-                AffinityManager::setCurrentNativeDevice(e);
+            AffinityManager::setCurrentNativeDevice(e);
 
-                LaunchContext::_contexts[e] = std::make_shared<LaunchContext>();
-            }
+            LaunchContext::_contexts[e] = std::make_shared<LaunchContext>();
+          }
 
-            // don't forget to restore device back again
-            AffinityManager::setCurrentNativeDevice(deviceId);
+          // don't forget to restore device back again
+          AffinityManager::setCurrentNativeDevice(deviceId);
         }
-        _mutex.unlock();
+      }
 
-        // return context for current device
-        return LaunchContext::_contexts[deviceId].get();
+      // return context for current device
+      return LaunchContext::_contexts[deviceId].get();
     }
 
 
@@ -121,11 +123,11 @@ LaunchContext::LaunchContext() {
     };
 
     void* LaunchContext::getCublasHandle() const {
-        return CublasHelper::getInstance()->handle();
+        return CublasHelper::getInstance().handle();
     };
 
     void* LaunchContext::getCusolverHandle() const {
-        return CublasHelper::getInstance()->solver();
+        return CublasHelper::getInstance().solver();
     };
 
     cudaStream_t* LaunchContext::getCudaStream() const {
@@ -175,7 +177,7 @@ LaunchContext::LaunchContext() {
     }
 
     void* LaunchContext::getCuDnnHandle() const {
-        return CublasHelper::getInstance()->cudnn();
+        return CublasHelper::getInstance().cudnn();
     }
 
     sd::ErrorReference* LaunchContext::errorReference() {
diff --git a/libnd4j/include/execution/impl/ThreadPool.cpp b/libnd4j/include/execution/impl/ThreadPool.cpp
index b02c4c4d5..f6c3fdaca 100644
--- a/libnd4j/include/execution/impl/ThreadPool.cpp
+++ b/libnd4j/include/execution/impl/ThreadPool.cpp
@@ -78,7 +78,7 @@ namespace samediff {
     ThreadPool::ThreadPool() {
         // TODO: number of threads must reflect number of cores for UMA system. In case of NUMA it should be per-device pool
         // FIXME: on mobile phones this feature must NOT be used
-        _available = sd::Environment::getInstance()->maxThreads();
+        _available = sd::Environment::getInstance().maxThreads();
 
         _queues.resize(_available.load());
         _threads.resize(_available.load());
@@ -88,7 +88,7 @@ namespace samediff {
         for (int e = 0; e < _available.load(); e++) {
             _queues[e] = new BlockingQueue<CallableWithArguments*>(2);
             _interfaces[e] = new CallableInterface();
-            _threads[e] = new std::thread(executionLoopWithInterface_, e, _interfaces[e]);
+            _threads[e] = std::thread(executionLoopWithInterface_, e, _interfaces[e]);
             _tickets.push(new Ticket());
             // _threads[e] = new std::thread(executionLoop_, e, _queues[e]);
 
@@ -125,19 +125,22 @@ namespace samediff {
             // stop each and every thread
 
             // release queue and thread
-            //delete _queues[e];
-            //delete _threads[e];
+            delete _queues[e];
+            _threads[e].detach();
+            //delete _interfaces[e];
         }
+
+        while (!_tickets.empty()) {
+          auto t = _tickets.front();
+          _tickets.pop();
+          delete t;
+        }
+
     }
 
-    static std::mutex _lmutex;
-
-    ThreadPool* ThreadPool::getInstance() {
-        std::unique_lock<std::mutex> lock(_lmutex);
-        if (!_INSTANCE)
-            _INSTANCE = new ThreadPool();
-
-        return _INSTANCE;
+    ThreadPool& ThreadPool::getInstance() {
+      static ThreadPool instance;
+      return instance;
     }
 
     void ThreadPool::release(int numThreads) {
@@ -188,7 +191,4 @@ namespace samediff {
         std::unique_lock<std::mutex> lock(_lock);
         _tickets.push(ticket);
     }
-
-
-    ThreadPool* ThreadPool::_INSTANCE = 0;
 }
diff --git a/libnd4j/include/execution/impl/Threads.cpp b/libnd4j/include/execution/impl/Threads.cpp
index 51339abf1..90dd519b1 100644
--- a/libnd4j/include/execution/impl/Threads.cpp
+++ b/libnd4j/include/execution/impl/Threads.cpp
@@ -357,7 +357,7 @@ namespace samediff {
             return 1;
         }
 
-        auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads);
+        auto ticket = ThreadPool::getInstance().tryAcquire(numThreads);
         if (ticket != nullptr) {
             // if we got our threads - we'll run our jobs here
             auto span = delta / numThreads;
@@ -449,7 +449,7 @@ namespace samediff {
             // but we still mimic multithreaded execution
             return numThreads;
         } else {
-            auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads);
+            auto ticket = ThreadPool::getInstance().tryAcquire(numThreads);
             if (ticket != nullptr) {
 
                 for (int e = 0; e < numThreads; e++) {
@@ -499,7 +499,7 @@ namespace samediff {
             return 1;
         }
 
-        auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads);
+        auto ticket = ThreadPool::getInstance().tryAcquire(numThreads);
         if (ticket != nullptr) {
             auto splitLoop = ThreadsHelper::pickLoop3d(numThreads, itersX, itersY, itersZ);
 
@@ -526,7 +526,7 @@ namespace samediff {
     }
 
     int Threads::parallel_do(FUNC_DO function, uint64_t numThreads) {
-        auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1);
+        auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1);
         if (ticket != nullptr) {
 
             // submit tasks one by one
@@ -565,7 +565,7 @@ namespace samediff {
         if (numThreads == 1)
             return function(0, start, stop, increment);
 
-        auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1);
+        auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1);
         if (ticket == nullptr)
             return function(0, start, stop, increment);
 
@@ -609,7 +609,7 @@ namespace samediff {
         if (numThreads == 1)
             return function(0, start, stop, increment);
 
-        auto ticket = ThreadPool::getInstance()->tryAcquire(numThreads - 1);
+        auto ticket = ThreadPool::getInstance().tryAcquire(numThreads - 1);
         if (ticket == nullptr)
             return function(0, start, stop, increment);
 
@@ -668,7 +668,7 @@ namespace samediff {
         numThreads = static_cast<int>(std::ceil((double)delta / spand));
         auto span  = static_cast<Nd4jLong>(spand);
 
-        auto ticket = samediff::ThreadPool::getInstance()->tryAcquire(numThreads);
+        auto ticket = samediff::ThreadPool::getInstance().tryAcquire(numThreads);
         if (ticket != nullptr) {
             //tail_add is additional value of the last part
             //it could be negative or positive
diff --git a/libnd4j/include/execution/impl/Ticket.cpp b/libnd4j/include/execution/impl/Ticket.cpp
index 98cb05376..b50b8f771 100644
--- a/libnd4j/include/execution/impl/Ticket.cpp
+++ b/libnd4j/include/execution/impl/Ticket.cpp
@@ -31,7 +31,7 @@ namespace samediff {
 
     Ticket::Ticket() {
         _acquired = true;
-        _interfaces.resize(sd::Environment::getInstance()->maxThreads());
+        _interfaces.resize(sd::Environment::getInstance().maxThreads());
     }
 
     bool Ticket::acquired() {
@@ -80,11 +80,11 @@ namespace samediff {
             _interfaces[e]->markAvailable();
 
             // increment availability counter
-            ThreadPool::getInstance()->release();
+            ThreadPool::getInstance().release();
         }
 
         // return this ticket back to the pool
-        ThreadPool::getInstance()->release(this);
+        ThreadPool::getInstance().release(this);
     }
 
 
diff --git a/libnd4j/include/graph/ContextPrototype.h b/libnd4j/include/graph/ContextPrototype.h
index 57d773dbb..e61831fa7 100644
--- a/libnd4j/include/graph/ContextPrototype.h
+++ b/libnd4j/include/graph/ContextPrototype.h
@@ -61,7 +61,7 @@ namespace sd {
             std::vector<sd::DataType> _dataTypes;
 
             sd::ops::OpDescriptor* _opDescriptor;
-            bool _useMKLDNN = sd::Environment::getInstance()->isUseMKLDNN();
+            bool _useMKLDNN = sd::Environment::getInstance().isUseMKLDNN();
 
             // target engine for execution
             samediff::Engine _engine = DEFAULT_ENGINE;
diff --git a/libnd4j/include/graph/GraphHolder.h b/libnd4j/include/graph/GraphHolder.h
index 07e091f42..84aebd694 100644
--- a/libnd4j/include/graph/GraphHolder.h
+++ b/libnd4j/include/graph/GraphHolder.h
@@ -30,7 +30,6 @@ namespace sd {
     namespace graph {
         class ND4J_EXPORT GraphHolder {
         private:
-            static GraphHolder *_INSTANCE;
             MAP_IMPL<Nd4jLong, Graph *> _graphF;
 
             MAP_IMPL<Nd4jLong, SimpleReadWriteLock> _locks;
@@ -38,7 +37,7 @@ namespace sd {
             GraphHolder() = default;
             ~GraphHolder() = default;
         public:
-            static GraphHolder* getInstance();
+            static GraphHolder& getInstance();
 
             void registerGraph(Nd4jLong graphId, Graph *graph);
             
diff --git a/libnd4j/include/graph/execution/impl/LogicReturn.cpp b/libnd4j/include/graph/execution/impl/LogicReturn.cpp
index c9dbafd6d..0ee62e945 100644
--- a/libnd4j/include/graph/execution/impl/LogicReturn.cpp
+++ b/libnd4j/include/graph/execution/impl/LogicReturn.cpp
@@ -34,7 +34,7 @@ namespace sd {
                 // FIXME!!
                 outputAddr.second = e;
 
-                if (Environment::getInstance()->isDebugAndVerbose())
+                if (Environment::getInstance().isDebugAndVerbose())
                     nd4j_debug("Return input: <%i, %i>; Return output: <%i, %i>\n", inputAddr.first, inputAddr.second, outputAddr.first, outputAddr.second);
 
                 auto varIn = __variableSpace->getVariable(inputAddr);
@@ -45,7 +45,7 @@ namespace sd {
                 // FIXME: this is obviously wrong, we should keep depth track for backprop here
                 varOut->getNDArray()->assign(varIn->getNDArray());
 
-                if (Environment::getInstance()->isDebugAndVerbose())
+                if (Environment::getInstance().isDebugAndVerbose())
                     nd4j_debug("In after: [%f]; Out after: [%f]\n", varIn->getNDArray()->meanNumber().e<float>(0), varOut->getNDArray()->meanNumber().e<float>(0));
             }
 
diff --git a/libnd4j/include/graph/execution/impl/LogicWhile.cpp b/libnd4j/include/graph/execution/impl/LogicWhile.cpp
index 1dfd3aaf2..fec9a0d30 100644
--- a/libnd4j/include/graph/execution/impl/LogicWhile.cpp
+++ b/libnd4j/include/graph/execution/impl/LogicWhile.cpp
@@ -96,7 +96,7 @@ namespace sd {
                 // now we should take result of the Scope run, and evaluate it
                 auto result = __variableSpace->getVariable(lastNode)->getNDArray();
 
-                if (Environment::getInstance()->isDebugAndVerbose())
+                if (Environment::getInstance().isDebugAndVerbose())
                     result->printBuffer("Result of the last node:");
 
                 // if result evaluates to 0.0 - condition returned FALSE
diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp
index ae5bc59a0..f76f66bbe 100644
--- a/libnd4j/include/graph/impl/Context.cpp
+++ b/libnd4j/include/graph/impl/Context.cpp
@@ -236,7 +236,7 @@ namespace sd {
 
             auto v = variable(p);
 
-            if (Environment::getInstance()->isDebugAndVerbose() && v != nullptr &&  v->getNDArray() != nullptr) {
+            if (Environment::getInstance().isDebugAndVerbose() && v != nullptr &&  v->getNDArray() != nullptr) {
                 auto array = v->getNDArray();
                 std::string shape_ = ShapeUtils::shapeAsString(array);
                 auto type = DataTypeUtils::asString(array->dataType());
diff --git a/libnd4j/include/graph/impl/Graph.cpp b/libnd4j/include/graph/impl/Graph.cpp
index 177adbe07..a50d1f4b6 100644
--- a/libnd4j/include/graph/impl/Graph.cpp
+++ b/libnd4j/include/graph/impl/Graph.cpp
@@ -166,7 +166,7 @@ namespace sd {
 //                            aNewShape[5] = 8192; // set type as FLOAT32 by default
 //                            aNewShape[6] = 1;
 //                            aNewShape[7] = 99;
-                            newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, 'c', {1,1});
+                            newShape = ConstantShapeHelper::getInstance().createShapeInfo(DataType::FLOAT32, 'c', {1,1});
                         } else {
                             auto in = node->input()->at(0);
 
@@ -184,7 +184,7 @@ namespace sd {
                             //shape::TAD tad(oldShape, node->getDimensions()->data(), node->getDimensions()->size());
                             auto numTads = shape::tadLength(oldShape, node->getDimensions()->data(), node->getDimensions()->size());
                             Nd4jLong shape[2] = {1, (int) numTads};
-                            newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(oldShape), 'c', 2, shape);
+                            newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(oldShape), 'c', 2, shape);
                         }
 
                         std::pair<int, int> pairAddr(node->id(), 0);
@@ -805,7 +805,7 @@ namespace sd {
                 // we're adding final nodes of the graph. those, not used as input anywhere
                 nd4j_debug("Paring nodes... \n", "");
 
-                if (Environment::getInstance()->isDebugAndVerbose()) {
+                if (Environment::getInstance().isDebugAndVerbose()) {
                     // nd4j_printv("current _output", _output);
                 }
                 //_output.clear();
@@ -852,7 +852,7 @@ namespace sd {
 
                         if (std::find(_output.begin(), _output.end(), node->id()) == _output.end())
                             _output.emplace_back(node->id());
-                    } else if (Environment::getInstance()->isDebugAndVerbose()) {
+                    } else if (Environment::getInstance().isDebugAndVerbose()) {
                         nd4j_debug("Node [%i:<%s>] has %i outputs announced:\n", v, node->name()->c_str(), node->output()->size());
                         printf("{");
                         for (auto s : *node->output()) {
@@ -1202,7 +1202,7 @@ namespace sd {
                         }
                         break;
                         default: {
-                            opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance()->local_to_string<int>((int) node->opNum()) + "}";
+                            opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance().local_to_string<int>((int) node->opNum()) + "}";
                         }
                     }
 
@@ -1250,7 +1250,7 @@ namespace sd {
                         }
                         break;
                         default: {
-                            opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance()->local_to_string<int>((int) node->opNum()) + "}";
+                            opNameStr = std::string(EnumUtils::_OpTypeToString(node->opType()))+"{" + ops::OpRegistrator::getInstance().local_to_string<int>((int) node->opNum()) + "}";
                         }
                     }
 
@@ -1447,7 +1447,7 @@ namespace sd {
             }
 
 
-            hash = ops::HashHelper::getInstance()->getLongHash(localStamp);
+            hash = ops::HashHelper::getInstance().getLongHash(localStamp);
 
             nd4j_debug("Graph hash: %lld\n", hash);
 
diff --git a/libnd4j/include/graph/impl/GraphExecutioner.cpp b/libnd4j/include/graph/impl/GraphExecutioner.cpp
index c673d2b31..abc3b2e0c 100644
--- a/libnd4j/include/graph/impl/GraphExecutioner.cpp
+++ b/libnd4j/include/graph/impl/GraphExecutioner.cpp
@@ -88,7 +88,7 @@ namespace graph {
 
     Context context(node->getContextPrototype(), variableSpace);
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose()) {
+    if (sd::Environment::getInstance().isDebugAndVerbose()) {
         //nd4j_debug("Input variables: %i\n", node->input()->size());
         printf("       Inputs: {");
         for (int e = 0; e < node->input()->size(); e++) {
@@ -215,10 +215,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
     }
     auto flowPath = __variableSpace->flowPath();
 
-    Nd4jLong tb0 = Environment::getInstance()->isProfiling() ? GraphProfile::currentTime() : 0L;
+    Nd4jLong tb0 = Environment::getInstance().isProfiling() ? GraphProfile::currentTime() : 0L;
     graph->buildGraph();
 
-    auto footprintForward = sd::memory::MemoryRegistrator::getInstance()->getGraphMemoryFootprint(graph->hashCode());
+    auto footprintForward = sd::memory::MemoryRegistrator::getInstance().getGraphMemoryFootprint(graph->hashCode());
     if (footprintForward > 0) {
         if (__variableSpace->launchContext()->getWorkspace() != nullptr) {
             // this method will work only if current workspace size is smaller then proposed value
@@ -228,10 +228,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
     }
 
     // optionally saving graph build time
-    if (Environment::getInstance()->isProfiling())
+    if (Environment::getInstance().isProfiling())
         flowPath->profile()->setBuildTime(GraphProfile::relativeTime(tb0));
 
-    Nd4jLong timeStart = Environment::getInstance()->isProfiling() ? GraphProfile::currentTime() : 0L;
+    Nd4jLong timeStart = Environment::getInstance().isProfiling() ? GraphProfile::currentTime() : 0L;
 
     bool pe = graph->getExecutorConfiguration()->_executionMode == ExecutionMode_AUTO;
 
@@ -259,10 +259,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
 
             Node* node = graph->getOnion()->at(l)->at(n);
 
-            if (Environment::getInstance()->isProfiling())
+            if (Environment::getInstance().isProfiling())
                 flowPath->profile()->nodeById(node->id(), node->name()->c_str());
 
-            if (lastId != node->id() && Environment::getInstance()->isProfiling()) {
+            if (lastId != node->id() && Environment::getInstance().isProfiling()) {
                 if (lastId != -10000000)
                     flowPath->profile()->nodeById(lastId)->setTotalTime(GraphProfile::relativeTime(nodeTime));
 
@@ -458,7 +458,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
                     // now we skip all branches except of this active one
                 }
 
-                if (sd::Environment::getInstance()->isDebugAndVerbose()) {
+                if (sd::Environment::getInstance().isDebugAndVerbose()) {
 
                     if (__variableSpace->getVariable(node->id())->hasNDArray()) {
                         auto array = __variableSpace->getVariable(node->id())->getNDArray();
@@ -481,7 +481,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
     }
 
     // optionally saving execution time
-    if (Environment::getInstance()->isProfiling()) {
+    if (Environment::getInstance().isProfiling()) {
         flowPath->profile()->nodeById(lastId)->setTotalTime(GraphProfile::relativeTime(nodeTime));
         flowPath->profile()->setExecutionTime(GraphProfile::relativeTime(timeStart));
         //flowPath->profile().printOut();
@@ -491,7 +491,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
     if (__variableSpace->launchContext()->getWorkspace() != nullptr) {
         auto m = __variableSpace->launchContext()->getWorkspace()->getAllocatedSize();
         auto h = graph->hashCode();
-        sd::memory::MemoryRegistrator::getInstance()->setGraphMemoryFootprintIfGreater(h, m);
+        sd::memory::MemoryRegistrator::getInstance().setGraphMemoryFootprintIfGreater(h, m);
     }
 
     if (tempFlow) {
@@ -523,7 +523,7 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
     // converting FlatGraph to internal representation
     auto nativeGraph = new Graph(restoredGraph);
 
-    if (Environment::getInstance()->isDebugAndVerbose()) {
+    if (Environment::getInstance().isDebugAndVerbose()) {
         nativeGraph->printOut();
     }
 
@@ -742,7 +742,7 @@ Graph* GraphExecutioner::importFromTensorFlow(const char *fileName) {
             nd4j_verbose("Node id: [%i]; name: [%s]; opName: [%s]\n", n + 1, node.name().c_str(),
                          node.op().c_str());
 
-            sd::ops::DeclarableOp *op = sd::ops::OpRegistrator::getInstance()->getOperationFloat(node.op().c_str());
+            sd::ops::DeclarableOp *op = sd::ops::OpRegistrator::getInstance().getOperationFloat(node.op().c_str());
 
             if (op == nullptr) {
                 nd4j_verbose("Op wasn't found: %s\n", node.op().c_str());
@@ -859,7 +859,7 @@ flatbuffers::Offset<FlatResult> GraphExecutioner::execute(Graph *graph, flatbuff
         }
     }
 
-    if (Environment::getInstance()->isDebugAndVerbose())
+    if (Environment::getInstance().isDebugAndVerbose())
         graph->printOut();
 
     auto status = GraphExecutioner::execute(graph);
diff --git a/libnd4j/include/graph/impl/GraphHolder.cpp b/libnd4j/include/graph/impl/GraphHolder.cpp
index c480508f5..13c4e3896 100644
--- a/libnd4j/include/graph/impl/GraphHolder.cpp
+++ b/libnd4j/include/graph/impl/GraphHolder.cpp
@@ -25,11 +25,9 @@
 
 namespace sd {
     namespace graph {
-        GraphHolder* GraphHolder::getInstance() {
-            if (_INSTANCE == 0)
-                _INSTANCE = new GraphHolder();
-
-            return _INSTANCE;
+        GraphHolder& GraphHolder::getInstance() {
+          static GraphHolder instance;
+          return instance;
         };
 
         void GraphHolder::registerGraph(Nd4jLong graphId, Graph* graph) {
@@ -126,7 +124,5 @@ namespace sd {
 
             return res;
         }
-
-        GraphHolder* GraphHolder::_INSTANCE = 0;
     }
 }
diff --git a/libnd4j/include/graph/impl/Node.cpp b/libnd4j/include/graph/impl/Node.cpp
index e3ea75ef9..a3baf1a9b 100644
--- a/libnd4j/include/graph/impl/Node.cpp
+++ b/libnd4j/include/graph/impl/Node.cpp
@@ -636,7 +636,7 @@ namespace sd {
                         block->setOpDescriptor(this->getCustomOp()->getOpDescriptor());
                     }
                 } else if (this->_opType == OpType_CUSTOM) {
-                        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(this->opNum());
+                        auto op = sd::ops::OpRegistrator::getInstance().getOperation(this->opNum());
                         if (op == nullptr) {
                             nd4j_verbose("Can't find operation: %lld\n", this->opNum());
                             throw std::runtime_error("Can't find requested operation");
diff --git a/libnd4j/include/helpers/BlasHelper.h b/libnd4j/include/helpers/BlasHelper.h
index b2fe7b60c..038df67b5 100644
--- a/libnd4j/include/helpers/BlasHelper.h
+++ b/libnd4j/include/helpers/BlasHelper.h
@@ -364,8 +364,6 @@ namespace sd {
 
     class BlasHelper {
     private:
-        static BlasHelper* _instance;
-
 		bool _hasHgemv = false;
 		bool _hasHgemm = false;
 		bool _hasHgemmBatch = false;
@@ -404,7 +402,7 @@ namespace sd {
         CusolverDnDgesvd cusolverDnDgesvd;
 
     public:
-        static BlasHelper* getInstance();
+        static BlasHelper& getInstance();
 
         void initializeFunctions(Nd4jPointer *functions);
 		void initializeDeviceFunctions(Nd4jPointer *functions);
diff --git a/libnd4j/include/helpers/ConstantHelper.h b/libnd4j/include/helpers/ConstantHelper.h
index 3e5681fb6..7d4446d34 100644
--- a/libnd4j/include/helpers/ConstantHelper.h
+++ b/libnd4j/include/helpers/ConstantHelper.h
@@ -35,7 +35,6 @@
 namespace sd {
     class ND4J_EXPORT ConstantHelper {
     private:
-        static ConstantHelper* _INSTANCE;
         ConstantHelper();
 
         std::vector<MAP_IMPL<ConstantDescriptor, ConstantHolder*>> _cache;
@@ -48,9 +47,9 @@ namespace sd {
 
         std::vector<Nd4jLong> _counters;
     public:
-        ~ConstantHelper() = default;
+        ~ConstantHelper();
 
-        static ConstantHelper* getInstance();
+        static ConstantHelper& getInstance();
         static int getCurrentDevice();
         static int getNumberOfDevices();
         void* replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace = nullptr);
diff --git a/libnd4j/include/helpers/ConstantShapeHelper.h b/libnd4j/include/helpers/ConstantShapeHelper.h
index 73281c507..25440e05c 100644
--- a/libnd4j/include/helpers/ConstantShapeHelper.h
+++ b/libnd4j/include/helpers/ConstantShapeHelper.h
@@ -27,7 +27,7 @@
 #include <mutex>
 #include <vector>
 #include <array/ShapeDescriptor.h>
-#include <array/ConstantDataBuffer.h>
+#include <array/ConstantShapeBuffer.h>
 #include <memory/Workspace.h>
 #include <system/op_boilerplate.h>
 
@@ -35,24 +35,22 @@ namespace sd {
 
     class ND4J_EXPORT ConstantShapeHelper {
     private:
-        static ConstantShapeHelper *_INSTANCE;
-
         std::mutex _mutex;
-        std::vector<MAP_IMPL<ShapeDescriptor, ConstantDataBuffer>> _cache;
+        std::vector<MAP_IMPL<ShapeDescriptor, ConstantShapeBuffer>> _cache;
 
 
         ConstantShapeHelper();
     public:
         ~ConstantShapeHelper() = default;
 
-        static ConstantShapeHelper* getInstance();
+        static ConstantShapeHelper & getInstance();
 
 
-        ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape);
-        ConstantDataBuffer bufferForShapeInfo(const ShapeDescriptor &descriptor);
-        ConstantDataBuffer bufferForShapeInfo(const Nd4jLong *shapeInfo);
-        ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape);
-        ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector<int> &dimensions = {});
+      ConstantShapeBuffer& bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape);
+      ConstantShapeBuffer& bufferForShapeInfo(const ShapeDescriptor &descriptor);
+      ConstantShapeBuffer& bufferForShapeInfo(const Nd4jLong *shapeInfo);
+      ConstantShapeBuffer& bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape);
+      ConstantShapeBuffer& createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector<int> &dimensions = {});
 
 
         const Nd4jLong* emptyShapeInfo(sd::DataType dataType);
diff --git a/libnd4j/include/helpers/ConstantTadHelper.h b/libnd4j/include/helpers/ConstantTadHelper.h
index 80efaa86f..10bdd108d 100644
--- a/libnd4j/include/helpers/ConstantTadHelper.h
+++ b/libnd4j/include/helpers/ConstantTadHelper.h
@@ -35,8 +35,6 @@
 namespace sd {
     class ND4J_EXPORT ConstantTadHelper {
     private:
-        static ConstantTadHelper *_INSTANCE;
-
         std::mutex _mutex;
         std::vector<MAP_IMPL<TadDescriptor, TadPack>> _cache;
 
@@ -44,7 +42,7 @@ namespace sd {
     public:
         ~ConstantTadHelper() = default;
 
-        static ConstantTadHelper* getInstance();
+        static ConstantTadHelper & getInstance();
 
         /**
          * These methods calculate Tensor-Along-Dimension(s) shape and offsets
diff --git a/libnd4j/include/helpers/DebugHelper.h b/libnd4j/include/helpers/DebugHelper.h
index b0387dd8c..10bb1dc90 100644
--- a/libnd4j/include/helpers/DebugHelper.h
+++ b/libnd4j/include/helpers/DebugHelper.h
@@ -44,7 +44,7 @@ namespace sd {
     // cuda-specific debug functions
 #ifdef __CUDACC__
         static FORCEINLINE void checkErrorCode(cudaStream_t *stream, int opNum = 0) {
-            if (Environment::getInstance()->isDebug()) {
+            if (Environment::getInstance().isDebug()) {
                 cudaError_t res = cudaStreamSynchronize(*stream);
 
                 if (res != 0) {
diff --git a/libnd4j/include/helpers/LoopKind.h b/libnd4j/include/helpers/LoopKind.h
index e3ca932b3..4efbea43a 100644
--- a/libnd4j/include/helpers/LoopKind.h
+++ b/libnd4j/include/helpers/LoopKind.h
@@ -206,7 +206,7 @@ LoopKind::Kind LoopKind::deduceKindOfLoopTadXZ(const Nd4jLong* xShapeInfo, const
     const bool tVectorOrC = shape::isCommonVector(tadShapeInfo, temp) || tOrder == 'c';
     const bool zVectorOrC = shape::isCommonVector(zShapeInfo, temp)   || zOrder == 'c';;
 
-    if(shape::length(tadShapeInfo) * shape::length(zShapeInfo) <= Environment::getInstance()->elementwiseThreshold() && xEws == 1 && xOrder == 'c' && xRank == 2 &&
+    if(shape::length(tadShapeInfo) * shape::length(zShapeInfo) <= Environment::getInstance().elementwiseThreshold() && xEws == 1 && xOrder == 'c' && xRank == 2 &&
         tEws > 1 && zEws == 1 && (allC || (tVectorOrC && zVectorOrC)))
         return SMALLARR2DX;
     if(tEws == 1 && zEws == 1 && (allC || (tVectorOrC && zVectorOrC)))
diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h
index f18bcc63d..9bf3daede 100644
--- a/libnd4j/include/helpers/Loops.h
+++ b/libnd4j/include/helpers/Loops.h
@@ -702,21 +702,21 @@ namespace sd {
         std::vector<Nd4jLong> zeroOffsets;
 
         if (xLen == yLen) {
-            tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dims, dimsLen);
-            tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen);
+            tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dims, dimsLen);
+            tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dims, dimsLen);
             xTadShapeInfo = tadPackX.primaryShapeInfo();
             yTadShapeInfo = tadPackY.primaryShapeInfo();
             xTadOffsets = tadPackX.primaryOffsets();
             yTadOffsets = tadPackY.primaryOffsets();
         }
         else if (yLen > xLen) {
-            tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen);
+            tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dims, dimsLen);
             xTadShapeInfo = xShapeInfo;
             yTadShapeInfo = tadPackY.primaryShapeInfo();
             yTadOffsets = tadPackY.primaryOffsets();
         }
         else {
-            tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dims, dimsLen);
+            tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dims, dimsLen);
             yTadShapeInfo = yShapeInfo;
             xTadShapeInfo = tadPackX.primaryShapeInfo();
             xTadOffsets = tadPackX.primaryOffsets();
diff --git a/libnd4j/include/helpers/OpTracker.h b/libnd4j/include/helpers/OpTracker.h
index 122f4f32b..dfccf5e5d 100644
--- a/libnd4j/include/helpers/OpTracker.h
+++ b/libnd4j/include/helpers/OpTracker.h
@@ -32,8 +32,6 @@
 namespace sd {
     class ND4J_EXPORT OpTracker {
     private:
-        static OpTracker* _INSTANCE;        
-
         std::string _export;
 
         int _operations = 0;
@@ -45,7 +43,7 @@ namespace sd {
         template <typename T>
         std::string local_to_string(T value);
     public:
-        static OpTracker* getInstance();
+        static OpTracker& getInstance();
 
         int totalGroups();
         int totalOperations();
diff --git a/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h b/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h
index 3a043be59..8c61bda23 100644
--- a/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h
+++ b/libnd4j/include/helpers/benchmark/BroadcastBenchmark.h
@@ -69,14 +69,14 @@ namespace sd {
 void executeOnce() override {
     PointersManager manager(LaunchContext::defaultContext(), "BroadcastBM");
 
-    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(_x->shapeInfo(), _axis);
-    auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(_z->shapeInfo(), _axis);
+    auto packX = ConstantTadHelper::getInstance().tadForDimensions(_x->shapeInfo(), _axis);
+    auto packZ = ConstantTadHelper::getInstance().tadForDimensions(_z->shapeInfo(), _axis);
 
-    auto tadOnlyShapeInfo = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo();
-    auto tadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets();
+    auto tadOnlyShapeInfo = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo();
+    auto tadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets();
 
-    auto tadOnlyShapeInfoZ = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo();
-    auto tadOffsetsZ = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets();
+    auto tadOnlyShapeInfoZ = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo();
+    auto tadOffsetsZ = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets();
 
     NativeOpExecutioner::execBroadcast(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), _y->buffer(), _y->shapeInfo(), _y->specialBuffer(), _y->specialShapeInfo(), _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo(), nullptr, _axis.size(),
             /*Nd4jLong **/ tadOnlyShapeInfo, /*Nd4jLong */ tadOffsets, /*Nd4jLong */ tadOnlyShapeInfoZ, /*Nd4jLong */ tadOffsetsZ);
diff --git a/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h b/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h
index f9347eb05..58c018a5b 100644
--- a/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h
+++ b/libnd4j/include/helpers/benchmark/DeclarableBenchmark.h
@@ -36,7 +36,7 @@ namespace sd {
         sd::graph::Context *_context = nullptr;
     public:
         DeclarableBenchmark(sd::ops::DeclarableOp &op, std::string name = 0) : OpBenchmark() {
-            _op = &op; //ops::OpRegistrator::getInstance()->getOperation(op.getOpHash());
+            _op = &op; //ops::OpRegistrator::getInstance().getOperation(op.getOpHash());
             _testName = name;
         }
 
diff --git a/libnd4j/include/helpers/benchmark/ReductionBenchmark.h b/libnd4j/include/helpers/benchmark/ReductionBenchmark.h
index a1dc0126f..d87c20d3c 100644
--- a/libnd4j/include/helpers/benchmark/ReductionBenchmark.h
+++ b/libnd4j/include/helpers/benchmark/ReductionBenchmark.h
@@ -88,10 +88,10 @@ namespace sd {
                 else
                     NativeOpExecutioner::execReduceSameScalar(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), nullptr, _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo());
             else {
-                auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_x->shapeInfo(), _axis);
+                auto pack = ConstantTadHelper::getInstance().tadForDimensions(_x->shapeInfo(), _axis);
 
-                auto tadOnlyShapeInfo = Environment::getInstance()->isCPU() ? pack.primaryShapeInfo() : pack.specialShapeInfo();
-                auto tadOffsets = Environment::getInstance()->isCPU() ? pack.primaryOffsets() : pack.specialOffsets();
+                auto tadOnlyShapeInfo = Environment::getInstance().isCPU() ? pack.primaryShapeInfo() : pack.specialShapeInfo();
+                auto tadOffsets = Environment::getInstance().isCPU() ? pack.primaryOffsets() : pack.specialOffsets();
 
                 if (_opType == 0)
                     NativeOpExecutioner::execReduceFloat(LaunchContext::defaultContext(), _opNum, _x->buffer(), _x->shapeInfo(), _x->specialBuffer(), _x->specialShapeInfo(), nullptr, _z->buffer(), _z->shapeInfo(), _z->specialBuffer(), _z->specialShapeInfo(), nullptr, _axis.size(), tadOnlyShapeInfo, tadOffsets);
diff --git a/libnd4j/include/helpers/cpu/ConstantHelper.cpp b/libnd4j/include/helpers/cpu/ConstantHelper.cpp
index 10b8a52c3..be6eff65c 100644
--- a/libnd4j/include/helpers/cpu/ConstantHelper.cpp
+++ b/libnd4j/include/helpers/cpu/ConstantHelper.cpp
@@ -27,6 +27,7 @@
 #include <loops/type_conversions.h>
 #include <system/type_boilerplate.h>
 #include <cstring>
+#include <array/PrimaryPointerDeallocator.h>
 
 namespace sd {
 
@@ -42,11 +43,17 @@ namespace sd {
         }
     }
 
-    ConstantHelper* ConstantHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new sd::ConstantHelper();
+ConstantHelper::~ConstantHelper() {
+  for (const auto &v:_cache) {
+    for (const auto &c:v) {
+      delete c.second;
+    }
+  }
+}
 
-        return _INSTANCE;
+ConstantHelper& ConstantHelper::getInstance() {
+      static ConstantHelper instance;
+      return instance;
     }
 
     void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) {
@@ -95,17 +102,17 @@ namespace sd {
             result = holder->getConstantDataBuffer(dataType);
         else {
             auto size = descriptor.length() * DataTypeUtils::sizeOf(dataType);
-            auto cbuff = new int8_t[size];
+            auto cbuff = std::make_shared<PointerWrapper>(new int8_t[size], std::make_shared<PrimaryPointerDeallocator>());
             _counters[deviceId] += size;
 
             // create buffer with this dtype
             if (descriptor.isFloat()) {
-                BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast<double *>(descriptor.floatValues().data()), descriptor.length(), cbuff), (sd::DataType::DOUBLE, double), LIBND4J_TYPES);
+                BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast<double *>(descriptor.floatValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::DOUBLE, double), LIBND4J_TYPES);
             } else if (descriptor.isInteger()) {
-                BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast<Nd4jLong *>(descriptor.integerValues().data()), descriptor.length(), cbuff), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES);
+                BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::TypeCast::convertGeneric, (nullptr, const_cast<Nd4jLong *>(descriptor.integerValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES);
             }
 
-            ConstantDataBuffer dataBuffer(cbuff, nullptr, descriptor.length(), DataTypeUtils::sizeOf(dataType));
+            ConstantDataBuffer dataBuffer(cbuff, descriptor.length(), dataType);
             holder->addBuffer(dataBuffer, dataType);
 
             result = holder->getConstantDataBuffer(dataType);
@@ -122,8 +129,6 @@ namespace sd {
         else
             return _counters[deviceId];
     }
-
-    sd::ConstantHelper* sd::ConstantHelper::_INSTANCE = 0;
 }
 
 #endif
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp
index fc8abe8aa..528527f36 100644
--- a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp
+++ b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp
@@ -24,51 +24,50 @@
 #include <helpers/logger.h>
 #include <helpers/ShapeBuilders.h>
 #include <helpers/ShapeUtils.h>
+#include <array/PrimaryPointerDeallocator.h>
 
 namespace sd {
     ConstantShapeHelper::ConstantShapeHelper() {
         _cache.resize(32);
         for (int e = 0; e < 32; e++) {
-            MAP_IMPL<ShapeDescriptor, ConstantDataBuffer> cache;
+            MAP_IMPL<ShapeDescriptor, ConstantShapeBuffer> cache;
             _cache[e] = cache;
         }
     }
 
-    ConstantShapeHelper* ConstantShapeHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new ConstantShapeHelper();
-
-        return _INSTANCE;
+    ConstantShapeHelper& ConstantShapeHelper::getInstance() {
+      static ConstantShapeHelper instance;
+      return instance;
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape) {
+ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape) {
         ShapeDescriptor descriptor(dataType, order, shape);
         return bufferForShapeInfo(descriptor);
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
+ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
         ShapeDescriptor descriptor(dataType, order, shape, rank);
         return bufferForShapeInfo(descriptor);
     }
 
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) {
-        int deviceId = 0;
+ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) {
+  int deviceId = 0;
 
-        std::lock_guard<std::mutex> lock(_mutex);
+  std::lock_guard<std::mutex> lock(_mutex);
 
-        if (_cache[deviceId].count(descriptor) == 0) {
-            auto hPtr = descriptor.toShapeInfo();
-            ConstantDataBuffer buffer(hPtr, nullptr, shape::shapeInfoLength(hPtr)*sizeof(Nd4jLong), DataType::INT64);
-            ShapeDescriptor descriptor1(descriptor);
-            _cache[deviceId][descriptor1] = buffer;
-            return _cache[deviceId][descriptor1];
-        } else {
-            return _cache[deviceId].at(descriptor);
-        }
-    }
+  if (_cache[deviceId].count(descriptor) == 0) {
+    auto hPtr = std::make_shared<PointerWrapper>(descriptor.toShapeInfo(), std::make_shared<PrimaryPointerDeallocator>());
+    ConstantShapeBuffer buffer(hPtr);
+    ShapeDescriptor descriptor1(descriptor);
+    _cache[deviceId][descriptor1] = buffer;
+    return _cache[deviceId][descriptor1];
+  } else {
+    return _cache[deviceId].at(descriptor);
+  }
+}
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) {
+ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) {
         ShapeDescriptor descriptor(shapeInfo);
         return bufferForShapeInfo(descriptor);
     }
@@ -83,7 +82,7 @@ namespace sd {
 
     const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
         ShapeDescriptor descriptor(dataType, order, shape, rank);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) {
@@ -92,26 +91,26 @@ namespace sd {
 
     const Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::scalarDescriptor(dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector<Nd4jLong> &shape) {
         ShapeDescriptor descriptor(dataType, order, shape);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) {
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) {
@@ -135,7 +134,7 @@ namespace sd {
 
 
 ////////////////////////////////////////////////////////////////////////
-ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector<int> &dimensions) {
+ConstantShapeBuffer& ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector<int> &dimensions) {
 
     Nd4jLong* newShapeInfo = nullptr;
     ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong);
@@ -185,10 +184,6 @@ ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(c
 
     return bufferForShapeInfo(descriptor);
 }
-
-
-sd::ConstantShapeHelper* sd::ConstantShapeHelper::_INSTANCE = 0;
-
-}
+} // namespace sd
 
 #endif
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp b/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp
index ea32db7e6..9f859ee3e 100644
--- a/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp
+++ b/libnd4j/include/helpers/cpu/ConstantTadHelper.cpp
@@ -21,6 +21,8 @@
 #include "../ConstantTadHelper.h"
 #include <helpers/TAD.h>
 #include <helpers/ShapeUtils.h>
+#include <array/ConstantOffsetsBuffer.h>
+#include <array/PrimaryPointerDeallocator.h>
 
 #ifndef __CUDABLAS__
 
@@ -32,11 +34,9 @@ namespace sd {
         _cache.emplace_back(pack);
     }
 
-    ConstantTadHelper* ConstantTadHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new ConstantTadHelper();
-
-        return _INSTANCE;
+    ConstantTadHelper& ConstantTadHelper::getInstance() {
+      static ConstantTadHelper instance;
+      return instance;
     }
 
     TadPack ConstantTadHelper::tadForDimensions(const Nd4jLong *originalShape, int dimension, const bool keepUnitiesInShape) {
@@ -60,60 +60,31 @@ namespace sd {
     TadPack ConstantTadHelper::tadForDimensions(TadDescriptor &descriptor) {
         const int deviceId = 0;
 
-        _mutex.lock();
+        std::lock_guard<std::mutex> lock(_mutex);
         if (_cache[deviceId].count(descriptor) == 0) {
-
+          // if there's no TadPack matching this descriptor - create one
             const auto shapeInfo = descriptor.originalShape().toShapeInfo();
             const int rank = shape::rank(shapeInfo);
             const std::vector<int> dimsToExclude = ShapeUtils::evalDimsToExclude(rank, descriptor.axis());
             const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(shapeInfo, dimsToExclude);
             const int subArrRank = (rank == dimsToExclude.size() || descriptor.areUnitiesinShape()) ? rank : rank - dimsToExclude.size();
 
-            auto sPtr = new Nd4jLong[shape::shapeInfoLength(subArrRank)];   // shape of sub-arrays (same for all for them)
-            auto oPtr = new Nd4jLong[numOfSubArrs];
+            auto sPtr = std::make_shared<PointerWrapper>(new Nd4jLong[shape::shapeInfoLength(subArrRank)], std::make_shared<PrimaryPointerDeallocator>());   // shape of sub-arrays (same for all for them)
+            auto oPtr = std::make_shared<PointerWrapper>(new Nd4jLong[numOfSubArrs], std::make_shared<PrimaryPointerDeallocator>());
 
             if (numOfSubArrs > 0)
-                shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr, oPtr, descriptor.areUnitiesinShape());
-
-
-            ConstantDataBuffer shapesBuffer(sPtr, nullptr, shape::shapeInfoLength(subArrRank)*sizeof(Nd4jLong), DataType::INT64);
-            ConstantDataBuffer offsetsBuffer(oPtr, nullptr, numOfSubArrs*sizeof(Nd4jLong), DataType::INT64);
-            TadPack t(shapesBuffer, offsetsBuffer, numOfSubArrs);
-
-
-
-            // auto shapeInfo = descriptor.originalShape().toShapeInfo();
-            // shape::TAD tad;
-            // tad.init(shapeInfo, descriptor.axis().data(), descriptor.axis().size());
-            // tad.createTadOnlyShapeInfo();
-            // tad.createOffsets();
-
-            // auto sPtr = new Nd4jLong[shape::shapeInfoLength(tad.tadOnlyShapeInfo)];
-            // auto oPtr = new Nd4jLong[tad.numTads];
-
-            // memcpy(sPtr, tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-            // memcpy(oPtr, tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
-
-            // TadPack t(shapesBuffer, offsetsBuffer, tad.numTads);
-
+                shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr->pointerAsT<Nd4jLong>(), oPtr->pointerAsT<Nd4jLong>(), descriptor.areUnitiesinShape());
 
+            ConstantShapeBuffer shapeBuffer(sPtr);
+            ConstantOffsetsBuffer offsetsBuffer(oPtr);
+            TadPack t(shapeBuffer, offsetsBuffer, numOfSubArrs);
             _cache[deviceId][descriptor] = t;
 
-            TadPack &r = _cache[deviceId][descriptor];
-            _mutex.unlock();
-
             delete[] shapeInfo;
-
-            return r;
-        } else {
-            TadPack r = _cache[deviceId][descriptor];
-            _mutex.unlock();
-
-            return r;
         }
-    }
 
-    sd::ConstantTadHelper* sd::ConstantTadHelper::_INSTANCE = 0;
+        return _cache[deviceId][descriptor];
+    }
 }
 
 #endif
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cpu/MmulHelper.cpp b/libnd4j/include/helpers/cpu/MmulHelper.cpp
index 26a6643c3..437eebe1d 100644
--- a/libnd4j/include/helpers/cpu/MmulHelper.cpp
+++ b/libnd4j/include/helpers/cpu/MmulHelper.cpp
@@ -162,7 +162,7 @@ static void usualDot(const Nd4jLong length, const double alpha, const void* vX,
     const bool betaPersent = beta;
 
     T3 sum = 0;
-    PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance()->elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum))
+    PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance().elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum))
     for(Nd4jLong i = 0; i < length; ++i)
             sum += X[i * incx] * Y[i * incy];
 
@@ -210,7 +210,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con
     const auto cType = C->dataType();
 
     const bool AB(aType == bType), AC(aType == cType), ABC(AB && AC);
-    const bool hasGemm = BlasHelper::getInstance()->hasGEMM(aType);
+    const bool hasGemm = BlasHelper::getInstance().hasGEMM(aType);
 
     const bool typeDouble = hasGemm && ABC &&  aType == DataType::DOUBLE;
     const bool typeFloat  = hasGemm && ABC &&  aType == DataType::FLOAT32;
@@ -261,10 +261,10 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con
         const int ldc = (cMcont && cNcont) ? M : !cMcont ? pC->strideAt(0) : pC->strideAt(1);
 
         if(typeFloat) {
-            BlasHelper::getInstance()->sgemm()(blasOrder, transAblas, transBblas, M, N, K, (float) alpha, pA->bufferAsT<float>(), lda, pB->bufferAsT<float>(), ldb, (float) beta, pC->bufferAsT<float>(), ldc);
+            BlasHelper::getInstance().sgemm()(blasOrder, transAblas, transBblas, M, N, K, (float) alpha, pA->bufferAsT<float>(), lda, pB->bufferAsT<float>(), ldb, (float) beta, pC->bufferAsT<float>(), ldc);
         }
         else if(typeDouble) {
-            BlasHelper::getInstance()->dgemm()(blasOrder, transAblas, transBblas, M, N, K, (double) alpha, pA->bufferAsT<double>(), lda, pB->bufferAsT<double>(), ldb, (double) beta, pC->bufferAsT<double>(), ldc);
+            BlasHelper::getInstance().dgemm()(blasOrder, transAblas, transBblas, M, N, K, (double) alpha, pA->bufferAsT<double>(), lda, pB->bufferAsT<double>(), ldb, (double) beta, pC->bufferAsT<double>(), ldc);
         }
 
         if(pC != C) {
@@ -321,7 +321,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y,
     const auto yType = Y->dataType();
 
     const bool AX(aType == xType), AY(aType == yType), AXY(AX && AY);
-    const bool hasGemv = BlasHelper::getInstance()->hasGEMV(aType);
+    const bool hasGemv = BlasHelper::getInstance().hasGEMV(aType);
 
     const bool typeDouble = hasGemv && AXY && aType == DataType::DOUBLE;
     const bool typeFloat  = hasGemv && AXY && aType == DataType::FLOAT32;
@@ -347,10 +347,10 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y,
 
         // choose appropriate cuda gemm api depending on data types
         if(typeDouble) {
-            BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy);
+            BlasHelper::getInstance().dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy);
         }
         else if(typeFloat) {
-            BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy);
+            BlasHelper::getInstance().sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy);
         }
 
         if(pA != A)
@@ -617,7 +617,7 @@ static void usualGemm(const char cOrder, const bool transA, const bool transB, c
     const bool flagA = (flagC && transA) || (!flagC && !transA);
     const bool flagB = (flagC && transB) || (!flagC && !transB);
 
-    // PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(M*N > Environment::getInstance()->elementwiseThreshold()) schedule(guided))
+    // PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(M*N > Environment::getInstance().elementwiseThreshold()) schedule(guided))
     // for(uint row = 0; row < M; ++row) {
 
     //     T3* c = flagC ? (C + row) : (C + row * ldc);
diff --git a/libnd4j/include/helpers/cpu/cublasHelper.cpp b/libnd4j/include/helpers/cpu/cublasHelper.cpp
index f6f718702..4b17e601d 100644
--- a/libnd4j/include/helpers/cpu/cublasHelper.cpp
+++ b/libnd4j/include/helpers/cpu/cublasHelper.cpp
@@ -37,11 +37,9 @@ namespace sd {
 
     }
 
-    CublasHelper* CublasHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new sd::CublasHelper();
-
-        return _INSTANCE;
+    CublasHelper& CublasHelper::getInstance() {
+      static CublasHelper instance;
+      return instance;
     }
 
     void* CublasHelper::handle() {
@@ -55,7 +53,4 @@ namespace sd {
     void* CublasHelper::handle(int deviceId) {
         return nullptr;
     }
-
-
-    sd::CublasHelper* sd::CublasHelper::_INSTANCE = 0;
 }
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cublasHelper.h b/libnd4j/include/helpers/cublasHelper.h
index 0300f3698..8ebdc66a7 100644
--- a/libnd4j/include/helpers/cublasHelper.h
+++ b/libnd4j/include/helpers/cublasHelper.h
@@ -29,7 +29,6 @@
 namespace sd {
     class ND4J_EXPORT CublasHelper {
     private:
-        static CublasHelper *_INSTANCE;
         static std::mutex _mutex;
 
         std::vector<void*> _cache;
@@ -37,9 +36,9 @@ namespace sd {
         std::vector<void*> _cudnn;
 
         CublasHelper();
-        ~CublasHelper();
     public:
-        static CublasHelper* getInstance();
+      ~CublasHelper();
+      static CublasHelper& getInstance();
 
         void* cudnn();
         void* solver();
diff --git a/libnd4j/include/helpers/cuda/ConstantHelper.cu b/libnd4j/include/helpers/cuda/ConstantHelper.cu
index 62d932489..7eb9273e5 100644
--- a/libnd4j/include/helpers/cuda/ConstantHelper.cu
+++ b/libnd4j/include/helpers/cuda/ConstantHelper.cu
@@ -29,6 +29,7 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include <execution/AffinityManager.h>
+#include <array/PrimaryPointerDeallocator.h>
 
 #define CONSTANT_LIMIT 49152
 
@@ -84,11 +85,17 @@ namespace sd {
             throw cuda_exception::build("Final cudaSetDevice failed", res);
     }
 
-    ConstantHelper* ConstantHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new sd::ConstantHelper();
+ConstantHelper::~ConstantHelper() {
+  for (const auto &v:_cache) {
+    for (const auto &c:v) {
+      delete c.second;
+    }
+  }
+}
 
-        return _INSTANCE;
+    ConstantHelper& ConstantHelper::getInstance() {
+      static ConstantHelper instance;
+      return instance;
     }
 
     void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) {
@@ -156,19 +163,21 @@ namespace sd {
              result = holder->getConstantDataBuffer(dataType);
         } else {
             auto numBytes = descriptor.length() * DataTypeUtils::sizeOf(dataType);
-            auto cbuff = new int8_t[numBytes];
+            auto cbuff = std::make_shared<PointerWrapper>(new int8_t[numBytes], std::make_shared<PrimaryPointerDeallocator>());
             _counters[deviceId] += numBytes;
 
             // create buffer with this dtype
             if (descriptor.isFloat()) {
-                BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<double *>(descriptor.floatValues().data()), descriptor.length(), cbuff), (sd::DataType::DOUBLE, double), LIBND4J_TYPES);
+                BUILD_DOUBLE_SELECTOR(sd::DataType::DOUBLE, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<double *>(descriptor.floatValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::DOUBLE, double), LIBND4J_TYPES);
             } else if (descriptor.isInteger()) {
-                BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<Nd4jLong *>(descriptor.integerValues().data()), descriptor.length(), cbuff), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES);
+                BUILD_DOUBLE_SELECTOR(sd::DataType::INT64, dataType, sd::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<Nd4jLong *>(descriptor.integerValues().data()), descriptor.length(), cbuff->pointer()), (sd::DataType::INT64, Nd4jLong), LIBND4J_TYPES);
             }
 
-            auto dbuff = replicatePointer(cbuff, descriptor.length() * DataTypeUtils::sizeOf(dataType));
+            // we don't have deallocator here.
+            // TODO: we probably want to make use deallocator here, if we're not using constant memory
+            auto dbuff = std::make_shared<PointerWrapper>(replicatePointer(cbuff->pointer(), descriptor.length() * DataTypeUtils::sizeOf(dataType)));
 
-            ConstantDataBuffer dataBuffer(cbuff, dbuff, descriptor.length(), DataTypeUtils::sizeOf(dataType));
+            ConstantDataBuffer dataBuffer(cbuff, dbuff, descriptor.length(), dataType);
 
             holder->addBuffer(dataBuffer, dataType);
             result = holder->getConstantDataBuffer(dataType);
@@ -184,6 +193,4 @@ namespace sd {
         else
             return _counters[deviceId];
     }
-
-    sd::ConstantHelper* sd::ConstantHelper::_INSTANCE = 0;
 }
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu
index 2026dbb04..35ba60ca9 100644
--- a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu
+++ b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu
@@ -24,6 +24,8 @@
 #include <helpers/ShapeBuilders.h>
 #include <execution/AffinityManager.h>
 #include <helpers/ConstantHelper.h>
+#include <array/PrimaryPointerDeallocator.h>
+#include <array/CudaPointerDeallocator.h>
 
 namespace sd {
 
@@ -32,46 +34,44 @@ namespace sd {
 
         _cache.resize(numDevices);
         for (int e = 0; e < numDevices; e++) {
-            MAP_IMPL<ShapeDescriptor, ConstantDataBuffer> cache;
+            MAP_IMPL<ShapeDescriptor, ConstantShapeBuffer> cache;
             _cache[e] = cache;
         }
     }
 
-    ConstantShapeHelper* ConstantShapeHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new ConstantShapeHelper();
-
-        return _INSTANCE;
+    ConstantShapeHelper& ConstantShapeHelper::getInstance() {
+      static ConstantShapeHelper instance;
+      return instance;
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape) {
+    ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(sd::DataType dataType, char order, const std::vector<Nd4jLong> &shape) {
         ShapeDescriptor descriptor(dataType, order, shape);
         return bufferForShapeInfo(descriptor);
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
+ConstantShapeBuffer& ConstantShapeHelper::bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
         ShapeDescriptor descriptor(dataType, order, shape, rank);
         return bufferForShapeInfo(descriptor);
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) {
+ConstantShapeBuffer&  ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) {
         int deviceId = AffinityManager::currentDeviceId();
 
         std::lock_guard<std::mutex> lock(_mutex);
 
         if (_cache[deviceId].count(descriptor) == 0) {
-            auto hPtr = descriptor.toShapeInfo();
-            auto dPtr = ConstantHelper::getInstance()->replicatePointer(hPtr, shape::shapeInfoByteLength(hPtr));
-            ConstantDataBuffer buffer(hPtr, dPtr, shape::shapeInfoLength(hPtr) * sizeof(Nd4jLong), DataType::INT64);
-            ShapeDescriptor descriptor1(descriptor);
-            _cache[deviceId][descriptor1] = buffer;
-            return _cache[deviceId][descriptor1];
+          auto hPtr = std::make_shared<PointerWrapper>(descriptor.toShapeInfo(), std::make_shared<PrimaryPointerDeallocator>());
+          auto dPtr = std::make_shared<PointerWrapper>(ConstantHelper::getInstance().replicatePointer(hPtr->pointer(), shape::shapeInfoByteLength(hPtr->pointerAsT<Nd4jLong>())), std::make_shared<CudaPointerDeallocator>());
+          ConstantShapeBuffer buffer(hPtr, dPtr);
+          ShapeDescriptor descriptor1(descriptor);
+          _cache[deviceId][descriptor1] = buffer;
+          return _cache[deviceId][descriptor1];
         } else {
-            return _cache[deviceId].at(descriptor);
+          return _cache[deviceId].at(descriptor);
         }
     }
 
-    ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) {
+ConstantShapeBuffer&  ConstantShapeHelper::bufferForShapeInfo(const Nd4jLong *shapeInfo) {
         ShapeDescriptor descriptor(shapeInfo);
         return bufferForShapeInfo(descriptor);
     }
@@ -85,7 +85,7 @@ namespace sd {
 
     Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) {
         ShapeDescriptor descriptor(dataType, order, shape, rank);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) {
@@ -94,26 +94,26 @@ namespace sd {
 
     Nd4jLong const* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::scalarDescriptor(dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) {
         auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector<Nd4jLong> &shape) {
         ShapeDescriptor descriptor(dataType, order, shape);
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) {
-        return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return bufferForShapeInfo(descriptor).primary();
     }
 
     Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) {
@@ -136,7 +136,7 @@ namespace sd {
     }
 
 ////////////////////////////////////////////////////////////////////////
-ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector<int>& dimensions) {
+ConstantShapeBuffer&  ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector<int>& dimensions) {
 
     Nd4jLong* newShapeInfo = nullptr;
     ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong);
@@ -187,7 +187,4 @@ ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(c
     return bufferForShapeInfo(descriptor);
 }
 
-
-sd::ConstantShapeHelper* sd::ConstantShapeHelper::_INSTANCE = 0;
-
 }
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cuda/ConstantTadHelper.cu b/libnd4j/include/helpers/cuda/ConstantTadHelper.cu
index 8463bab9c..662c99e7c 100644
--- a/libnd4j/include/helpers/cuda/ConstantTadHelper.cu
+++ b/libnd4j/include/helpers/cuda/ConstantTadHelper.cu
@@ -25,6 +25,8 @@
 #include <exceptions/cuda_exception.h>
 #include <execution/LaunchContext.h>
 #include <helpers/ShapeUtils.h>
+#include <array/PrimaryPointerDeallocator.h>
+#include <array/CudaPointerDeallocator.h>
 
 namespace sd {
     ConstantTadHelper::ConstantTadHelper() {
@@ -36,11 +38,9 @@ namespace sd {
         }
     }
 
-    ConstantTadHelper* ConstantTadHelper::getInstance() {
-        if (!_INSTANCE)
-            _INSTANCE = new ConstantTadHelper();
-
-        return _INSTANCE;
+    ConstantTadHelper& ConstantTadHelper::getInstance() {
+      static ConstantTadHelper instance;
+      return instance;
     }
 
     TadPack ConstantTadHelper::tadForDimensions(const Nd4jLong *originalShape, int dimension, const bool keepUnitiesInShape) {
@@ -73,25 +73,28 @@ namespace sd {
             const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(shapeInfo, dimsToExclude);
             const int subArrRank = (rank == dimsToExclude.size() || descriptor.areUnitiesinShape()) ? rank : rank - dimsToExclude.size();
 
-            auto sPtr = new Nd4jLong[shape::shapeInfoLength(subArrRank)];
-            auto oPtr = new Nd4jLong[numOfSubArrs];
+            auto sPtr = std::make_shared<PointerWrapper>(new Nd4jLong[shape::shapeInfoLength(subArrRank)], std::make_shared<PrimaryPointerDeallocator>());
+            auto oPtr = std::make_shared<PointerWrapper>(new Nd4jLong[numOfSubArrs], std::make_shared<PrimaryPointerDeallocator>());
 
             if (numOfSubArrs > 0)
-                shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr, oPtr, descriptor.areUnitiesinShape());
+                shape::calcSubArrsShapeInfoAndOffsets(shapeInfo, numOfSubArrs, dimsToExclude.size(), dimsToExclude.data(), sPtr->pointerAsT<Nd4jLong>(), oPtr->pointerAsT<Nd4jLong>(), descriptor.areUnitiesinShape());
 
             Nd4jPointer soPtr;
             auto res = cudaMalloc(reinterpret_cast<void**>(&soPtr),  numOfSubArrs * sizeof(Nd4jLong));
             if (res != 0)
                 throw cuda_exception::build("Memory allocation for tadOffsets failed", res);
 
-            res = cudaMemcpy(soPtr, oPtr, numOfSubArrs * sizeof(Nd4jLong), cudaMemcpyHostToDevice);
+            res = cudaMemcpy(soPtr, oPtr->pointer(), numOfSubArrs * sizeof(Nd4jLong), cudaMemcpyHostToDevice);
             if (res != 0)
                 throw cuda_exception::build("tadOffsets copy failed", res);
 
-            auto ssPtr = ConstantHelper::getInstance()->replicatePointer(sPtr, shape::shapeInfoByteLength(subArrRank));
+            // TODO: add deallocator here?
+            auto ssPtr = std::make_shared<PointerWrapper>(ConstantHelper::getInstance().replicatePointer(sPtr->pointer(), shape::shapeInfoByteLength(subArrRank)));
 
-            ConstantDataBuffer shapesBuffer(sPtr, ssPtr, shape::shapeInfoLength(subArrRank) * sizeof(Nd4jLong), DataType::INT64);
-            ConstantDataBuffer offsetsBuffer(oPtr, soPtr, numOfSubArrs * sizeof(Nd4jLong), DataType::INT64);
+
+
+            ConstantShapeBuffer shapesBuffer(sPtr, ssPtr);
+            ConstantOffsetsBuffer offsetsBuffer(oPtr, std::make_shared<PointerWrapper>(soPtr, std::make_shared<CudaPointerDeallocator>()));
 
             TadPack t(shapesBuffer, offsetsBuffer, numOfSubArrs);
             _cache[deviceId][descriptor] = t;
@@ -107,6 +110,4 @@ namespace sd {
             return r;
         }
     }
-
-    sd::ConstantTadHelper* sd::ConstantTadHelper::_INSTANCE = 0;
 }
\ No newline at end of file
diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu
index 0a3b466bc..d1122d794 100644
--- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu
+++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu
@@ -238,7 +238,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou
     if (C->isEmpty())
         return C;
 
-    const int major = Environment::getInstance()->capabilities()[AffinityManager::currentDeviceId()].first();
+    const int major = Environment::getInstance().capabilities()[AffinityManager::currentDeviceId()].first();
 
     const auto aType = A->dataType();
     const auto bType = B->dataType();
@@ -268,7 +268,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou
         const int sharedMem = threadsPerBlock * sizeof(int) * 6 + 128;                             // 6 = aRank + bRank + cRank
 
         NDArray::prepareSpecialUse({C}, {A, B});
-        // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
+        // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->special(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
         BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES)
         NDArray::registerSpecialUse({C}, {A, B});
 
@@ -411,7 +411,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y,
         const int blocksPerGrid = (M + threadsPerBlock - 1) / threadsPerBlock;
 
         NDArray::prepareSpecialUse({Y}, {A, X});
-        // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
+        // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->special(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
         BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES)
         NDArray::registerSpecialUse({Y}, {A, X});
 
@@ -667,7 +667,7 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con
         cBatchDims = reinterpret_cast<int*>(manager.replicatePointer(ShapeUtils::evalDimsToExclude(cRank, {cMaxis, cNaxis}).data(), (cRank - 2) * sizeof(int)));
 
     NDArray::prepareSpecialUse({C}, {A, B});
-    // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
+    // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->special(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES);
     BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES)
     NDArray::registerSpecialUse({C}, {A, B});
 
diff --git a/libnd4j/include/helpers/cuda_off/cublasHelper.cu b/libnd4j/include/helpers/cuda_off/cublasHelper.cu
index 7ab2d7d63..1773937ea 100644
--- a/libnd4j/include/helpers/cuda_off/cublasHelper.cu
+++ b/libnd4j/include/helpers/cuda_off/cublasHelper.cu
@@ -102,13 +102,9 @@ namespace sd {
             destroyHandle_(_cache[e]);
     }
 
-    CublasHelper* CublasHelper::getInstance() {
-        _mutex.lock();
-        if (!_INSTANCE)
-            _INSTANCE = new sd::CublasHelper();
-        _mutex.unlock();
-
-        return _INSTANCE;
+    CublasHelper& CublasHelper::getInstance() {
+      static CublasHelper instance;
+      return instance;
     }
 
     void* CublasHelper::cudnn() {
@@ -138,7 +134,4 @@ namespace sd {
 
         return _cache[deviceId];
     }
-
-
-    sd::CublasHelper* sd::CublasHelper::_INSTANCE = 0;
 }
\ No newline at end of file
diff --git a/libnd4j/include/helpers/helper_hash.h b/libnd4j/include/helpers/helper_hash.h
index 1b032238f..fa44b04b7 100644
--- a/libnd4j/include/helpers/helper_hash.h
+++ b/libnd4j/include/helpers/helper_hash.h
@@ -31,8 +31,6 @@ namespace sd {
     namespace ops {
         class ND4J_EXPORT HashHelper {
         private:
-            static HashHelper* _INSTANCE;
-
             Nd4jLong _byteTable[256];
             const Nd4jLong HSTART = 0xBB40E64DA205B064L;
             const Nd4jLong HMULT = 7664345821815920749L;
@@ -41,7 +39,7 @@ namespace sd {
             std::mutex _locker;
 
         public:
-            static HashHelper* getInstance();
+            static HashHelper& getInstance();
             Nd4jLong getLongHash(std::string& str);
         };
     }
diff --git a/libnd4j/include/helpers/impl/BlasHelper.cpp b/libnd4j/include/helpers/impl/BlasHelper.cpp
index 378c8a6f1..70839fe2d 100644
--- a/libnd4j/include/helpers/impl/BlasHelper.cpp
+++ b/libnd4j/include/helpers/impl/BlasHelper.cpp
@@ -20,10 +20,9 @@
 
 #include <helpers/BlasHelper.h>
 namespace sd {
-    BlasHelper* BlasHelper::getInstance() {
-        if (_instance == 0)
-            _instance = new BlasHelper();
-        return _instance;
+    BlasHelper& BlasHelper::getInstance() {
+      static BlasHelper instance;
+      return instance;
     }
 
 
@@ -74,7 +73,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasGEMV<float>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
 #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -86,7 +85,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasGEMV<double>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
 #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -138,7 +137,7 @@ namespace sd {
 
     bool BlasHelper::hasGEMV(const sd::DataType dtype)  {
         if(dtype == DataType::FLOAT32) {
-            if (sd::Environment::getInstance()->blasFallback())
+            if (sd::Environment::getInstance().blasFallback())
                 return false;
 
             #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -148,7 +147,7 @@ namespace sd {
             #endif
         }
         if(dtype == DataType::DOUBLE) {
-            if (sd::Environment::getInstance()->blasFallback())
+            if (sd::Environment::getInstance().blasFallback())
                 return false;
 
             #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -162,7 +161,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasGEMM<float>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
 #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -174,7 +173,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasGEMM<double>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
 #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -226,7 +225,7 @@ namespace sd {
 
     bool BlasHelper:: hasGEMM(const sd::DataType dtype) {
         if(dtype == DataType::FLOAT32) {
-            if (sd::Environment::getInstance()->blasFallback())
+            if (sd::Environment::getInstance().blasFallback())
                 return false;
 
             #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -236,7 +235,7 @@ namespace sd {
             #endif
         }
         if(dtype == DataType::DOUBLE) {
-            if (sd::Environment::getInstance()->blasFallback())
+            if (sd::Environment::getInstance().blasFallback())
                 return false;
 
             #if defined(__EXTERNAL_BLAS__) || defined(HAVE_OPENBLAS)
@@ -251,7 +250,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasBatchedGEMM<float>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
         return _hasSgemmBatch;
@@ -259,7 +258,7 @@ namespace sd {
 
     template <>
     bool BlasHelper::hasBatchedGEMM<double>() {
-        if (sd::Environment::getInstance()->blasFallback())
+        if (sd::Environment::getInstance().blasFallback())
             return false;
 
         return _hasDgemmBatch;
@@ -362,6 +361,4 @@ namespace sd {
 
     // destructor
     BlasHelper::~BlasHelper() noexcept { }
-
-    BlasHelper* BlasHelper::_instance = 0;
 }
diff --git a/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp b/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp
index 0e409a952..b0ef97457 100644
--- a/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp
+++ b/libnd4j/include/helpers/impl/OmpLaunchHelper.cpp
@@ -32,7 +32,7 @@ namespace sd {
 ////////////////////////////////////////////////////////////////////////////////
 OmpLaunchHelper::OmpLaunchHelper(const Nd4jLong N, float desiredNumThreads) {            
 
-    auto maxItersPerThread = Environment::getInstance()->elementwiseThreshold();    
+    auto maxItersPerThread = Environment::getInstance().elementwiseThreshold();
         
     if(N < maxItersPerThread)
         _numThreads = 1;
@@ -45,7 +45,7 @@ OmpLaunchHelper::OmpLaunchHelper(const Nd4jLong N, float desiredNumThreads) {
             else
                 desiredNumThreads = sd::math::nd4j_min<int>(omp_get_max_threads(), desiredNumThreads);
         #else
-            desiredNumThreads = sd::Environment::getInstance()->maxThreads();
+            desiredNumThreads = sd::Environment::getInstance().maxThreads();
         #endif
         _numThreads = sd::math::nd4j_min<int>(N / maxItersPerThread, desiredNumThreads);
     }
@@ -75,12 +75,12 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) {
         #ifdef _OPENMP
             return betterThreads(N, omp_get_max_threads());
         #else
-            return betterThreads(N, sd::Environment::getInstance()->maxThreads());;
+            return betterThreads(N, sd::Environment::getInstance().maxThreads());;
         #endif
     }
 
     int OmpLaunchHelper::betterThreads(Nd4jLong N, int maxThreads) {
-        auto t = Environment::getInstance()->elementwiseThreshold();
+        auto t = Environment::getInstance().elementwiseThreshold();
         if (N < t)
             return 1;
         else {
@@ -92,7 +92,7 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) {
 #ifdef _OPENMP
         auto maxThreads = omp_get_max_threads();
 #else
-        auto maxThreads = sd::Environment::getInstance()->maxThreads();
+        auto maxThreads = sd::Environment::getInstance().maxThreads();
 #endif
 
         // if there's only 1 thread allowed - nothing to do here
@@ -102,7 +102,7 @@ Nd4jLong OmpLaunchHelper::betterSpan(Nd4jLong N) {
         auto totalLength = tadLength * numTads;
 
         // if array is tiny - no need to spawn any threeds
-        if (totalLength < Environment::getInstance()->elementwiseThreshold())
+        if (totalLength < Environment::getInstance().elementwiseThreshold())
             return 1;
 
         // by default we're spawning as many threads we can, but not more than number of TADs
diff --git a/libnd4j/include/helpers/impl/OpTracker.cpp b/libnd4j/include/helpers/impl/OpTracker.cpp
index bb82ab0d1..e36d4ab5a 100644
--- a/libnd4j/include/helpers/impl/OpTracker.cpp
+++ b/libnd4j/include/helpers/impl/OpTracker.cpp
@@ -29,11 +29,9 @@ using namespace sd::graph;
 
 namespace sd {
     
-    OpTracker* OpTracker::getInstance() {
-        if (_INSTANCE == 0)
-            _INSTANCE = new OpTracker();
-
-        return _INSTANCE;
+    OpTracker& OpTracker::getInstance() {
+      static OpTracker instance;
+      return instance;
     }
 
     void OpTracker::storeOperation(sd::graph::OpType opType, const OpDescriptor& descriptor) {
@@ -118,6 +116,4 @@ namespace sd {
 
         return _export.c_str();
     }
-
-    sd::OpTracker* sd::OpTracker::_INSTANCE = 0;
 }
diff --git a/libnd4j/include/helpers/impl/ShapeUtils.cpp b/libnd4j/include/helpers/impl/ShapeUtils.cpp
index c327004bd..2c189cff1 100644
--- a/libnd4j/include/helpers/impl/ShapeUtils.cpp
+++ b/libnd4j/include/helpers/impl/ShapeUtils.cpp
@@ -130,7 +130,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
         Nd4jLong* outShapeInfo = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dataType, true, workspace);
         ShapeDescriptor descriptor(outShapeInfo, dataType);
         RELEASE(outShapeInfo, workspace);
-        return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
     }
 
     const int rank = shape::rank(shapeInfo);
@@ -168,7 +168,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
 
     ShapeDescriptor descriptor(outShapeInfo, dataType);
     RELEASE(outShapeInfo, workspace);
-    return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+    return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
 }
 
     const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector<int>& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) {
@@ -207,20 +207,20 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
 
             ShapeDescriptor descriptor(newShapeInfo, dataType);
             RELEASE(newShapeInfo, workspace);
-            return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+            return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
         }
         else if(supportOldShapes) {
             ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong);
             shape::shapeOldScalar(dataType, newShapeInfo, 'c');
             ShapeDescriptor descriptor(newShapeInfo, dataType);
             RELEASE(newShapeInfo, workspace);
-            return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+            return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
         }
         else {
             newShapeInfo = ShapeBuilders::createScalarShapeInfo(dataType, workspace);
             ShapeDescriptor descriptor(newShapeInfo, dataType);
             RELEASE(newShapeInfo, workspace);
-            return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+            return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
         }
     }
 
@@ -241,7 +241,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
         ShapeUtils::updateStridesAndType(newShapeInfo, shapeInfo, order);
         ShapeDescriptor descriptor(newShapeInfo, dataType);
         RELEASE(newShapeInfo, workspace);
-        return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
     }
 
 	int newRank = rank - dimSize;
@@ -252,13 +252,13 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
             shape::shapeOldScalar(ArrayOptions::dataType(shapeInfo), newShapeInfo, 'c');
             ShapeDescriptor descriptor(newShapeInfo, dataType);
             RELEASE(newShapeInfo, workspace);
-            return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+            return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
         }
         else {
             newShapeInfo = ShapeBuilders::createScalarShapeInfo(ArrayOptions::dataType(shapeInfo), workspace);
             ShapeDescriptor descriptor(newShapeInfo, dataType);
             RELEASE(newShapeInfo, workspace);
-            return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+            return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
         }
 	}
 
@@ -289,7 +289,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const NDArray* a,   cons
 
 	ShapeDescriptor descriptor(newShapeInfo, dataType);
 	RELEASE(newShapeInfo, workspace);
-	return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+	return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -341,7 +341,7 @@ std::vector<Nd4jLong> ShapeUtils::evalRepeatShape(int axis, const std::vector<in
 
         RELEASE(shapeInfoNew, workspace);
 
-        return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -486,7 +486,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo
 
         ShapeDescriptor descriptor(tmpShapeInfo);
         RELEASE(tmpShapeInfo, workspace);
-        resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        resultShapeInfo = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
 
         return true;
     }
@@ -525,7 +525,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo
 
         ShapeDescriptor descriptor(tmpShapeInfo);
         RELEASE(tmpShapeInfo, workspace);
-        resultShapeInfo = const_cast<Nd4jLong*>(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor));
+        resultShapeInfo = const_cast<Nd4jLong*>(ConstantShapeHelper::getInstance().createShapeInfo(descriptor));
 
         return true;
     }
@@ -594,7 +594,7 @@ bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLo
 
         ShapeDescriptor descriptor(newShapeInfo);
         RELEASE(newShapeInfo, workspace);
-        return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
+        return ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor).primary();
     }
 
     std::vector<Nd4jLong> ShapeUtils::pullShapeFromShapeInfo(const Nd4jLong *shapeInfo) {
@@ -745,7 +745,7 @@ std::vector<Nd4jLong> ShapeUtils::shapeAsVector(const Nd4jLong* shapeInfo) {
 
         ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo));
 
-        auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo);
+        auto result = ConstantShapeHelper::getInstance().createShapeInfo(outputShapeInfo);
         RELEASE(outputShapeInfo, workspace);
         return result;
     }
@@ -832,7 +832,7 @@ std::vector<int> ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandSh
             shape[1] = 1;
         }
 
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'f', 2, shape);
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'f', 2, shape);
 
         RELEASE(shape, workspace);
 
diff --git a/libnd4j/include/helpers/impl/helper_hash.cpp b/libnd4j/include/helpers/impl/helper_hash.cpp
index b12acb273..4fde919cd 100644
--- a/libnd4j/include/helpers/impl/helper_hash.cpp
+++ b/libnd4j/include/helpers/impl/helper_hash.cpp
@@ -24,11 +24,9 @@
 namespace sd {
     namespace ops {
 
-        HashHelper* HashHelper::getInstance() {
-            if (_INSTANCE == 0)
-                _INSTANCE = new HashHelper();
-
-            return _INSTANCE;
+        HashHelper& HashHelper::getInstance() {
+          static HashHelper instance;
+          return instance;
         }
 
         Nd4jLong HashHelper::getLongHash(std::string& str) {
@@ -64,8 +62,6 @@ namespace sd {
 
             return h;
         }
-
-        sd::ops::HashHelper* sd::ops::HashHelper::_INSTANCE = 0;
     }
 }
 
diff --git a/libnd4j/include/helpers/logger.h b/libnd4j/include/helpers/logger.h
index c13785ff7..b7ed88c1d 100644
--- a/libnd4j/include/helpers/logger.h
+++ b/libnd4j/include/helpers/logger.h
@@ -32,9 +32,9 @@
 
 #ifndef __CUDA_ARCH__
 
-#define nd4j_debug(FORMAT, ...) if (sd::Environment::getInstance()->isDebug() && sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
-#define nd4j_logger(FORMAT, ...) if (sd::Environment::getInstance()->isDebug() && sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
-#define nd4j_verbose(FORMAT, ...) if (sd::Environment::getInstance()->isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
+#define nd4j_debug(FORMAT, ...) if (sd::Environment::getInstance().isDebug() && sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
+#define nd4j_logger(FORMAT, ...) if (sd::Environment::getInstance().isDebug() && sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
+#define nd4j_verbose(FORMAT, ...) if (sd::Environment::getInstance().isVerbose()) sd::Logger::info(FORMAT, __VA_ARGS__);
 #define nd4j_printf(FORMAT, ...) sd::Logger::info(FORMAT, __VA_ARGS__);
 #define nd4j_printv(FORMAT, VECTOR)     sd::Logger::printv(FORMAT, VECTOR);
 
diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h
index 65cf29b66..719b086cb 100644
--- a/libnd4j/include/helpers/shape.h
+++ b/libnd4j/include/helpers/shape.h
@@ -384,9 +384,9 @@ namespace shape {
  * @param rank the rank of the shape
  */
 
-    ND4J_EXPORT _CUDA_HD int isMatrix(Nd4jLong *shape, int rank);
+    ND4J_EXPORT _CUDA_HD int isMatrix(const Nd4jLong *shape, int rank);
 
-    INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shapeInfo);
+    INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shapeInfo);
 /**
  * Returns the shape portion of an information
  * buffer
@@ -2346,7 +2346,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape)
 * @param shape the shape of the array
 * @param rank the rank of the shape
 */
-    INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shape, int rank) {
+    INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shape, int rank) {
         if (rank > 2)
             return 0;
         else if (rank <= 2) {
@@ -2357,7 +2357,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape)
         return 1;
     }
 
-    INLINEDEF _CUDA_HD int isMatrix(Nd4jLong *shapeInfo) {
+    INLINEDEF _CUDA_HD int isMatrix(const Nd4jLong *shapeInfo) {
         return isMatrix(shape::shapeOf(shapeInfo),shape::rank(shapeInfo));
     }
 
diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h
index c72b0d535..29c629b5a 100755
--- a/libnd4j/include/legacy/NativeOps.h
+++ b/libnd4j/include/legacy/NativeOps.h
@@ -1567,8 +1567,9 @@ ND4J_EXPORT void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd
 
 
 typedef sd::ConstantDataBuffer OpaqueConstantDataBuffer;
+typedef sd::ConstantShapeBuffer OpaqueConstantShapeBuffer;
 
-ND4J_EXPORT OpaqueConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty);
+ND4J_EXPORT OpaqueConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty);
 
 ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length);
 ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length);
@@ -1577,9 +1578,12 @@ ND4J_EXPORT OpaqueConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::Con
 ND4J_EXPORT Nd4jPointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer* dbf);
 ND4J_EXPORT Nd4jPointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer* dbf);
 ND4J_EXPORT Nd4jLong getConstantDataBufferLength(OpaqueConstantDataBuffer* dbf);
-ND4J_EXPORT Nd4jLong getConstantDataBufferSizeOf(OpaqueConstantDataBuffer* dbf);
 
-ND4J_EXPORT void deleteShapeBuffer(OpaqueConstantDataBuffer* ptr);
+ND4J_EXPORT Nd4jPointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer* dbf);
+ND4J_EXPORT Nd4jPointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer* dbf);
+
+ND4J_EXPORT void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer* ptr);
+ND4J_EXPORT void deleteConstantDataBuffer(OpaqueConstantDataBuffer* ptr);
 
 typedef sd::graph::Context OpaqueContext;
 typedef sd::graph::RandomGenerator OpaqueRandomGenerator;
diff --git a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp
index ad75922e4..6b6c51a13 100644
--- a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp
+++ b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp
@@ -245,7 +245,7 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext  *lc,
     if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo))
         return;
 
-    if (!sd::Environment::getInstance()->isExperimentalBuild())
+    if (!sd::Environment::getInstance().isExperimentalBuild())
         if ((yType != xType && yType != sd::DataType::BOOL) || xType != zType)
             throw sd::datatype_exception::build("NativeOps::execBroadcast both operands must have same data type", xType, yType);
 
@@ -338,7 +338,7 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext  *lc,
     if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo))
         return;
 
-    if (!sd::Environment::getInstance()->isExperimentalBuild())
+    if (!sd::Environment::getInstance().isExperimentalBuild())
         if (yType != xType || sd::DataType::BOOL != zType)
             throw sd::datatype_exception::build("NativeOps::execInverseBroadcastBool both operands must have same data type", xType, yType);
 
@@ -496,7 +496,7 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 #endif
 }
@@ -531,7 +531,7 @@ void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 }
 
@@ -564,7 +564,7 @@ void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 }
 
@@ -603,7 +603,7 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext  *lc,
 
     const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo);
 
-    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads());
+    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads());
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -631,7 +631,7 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext  *lc,
 
     const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo);
 
-    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads());
+    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads());
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -659,7 +659,7 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext  *lc,
 
     const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo);
 
-    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads());
+    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads());
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -687,7 +687,7 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext  *lc,
 
     const sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(hXShapeInfo, hZShapeInfo, tadShapeInfo);
 
-    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance()->maxMasterThreads());
+    samediff::Threads::parallel_tad(func, 0, shape::length(hZShapeInfo), 1, kindOfLoop == sd::LoopKind::Kind::SMALLARR2DX ? 1 : sd::Environment::getInstance().maxMasterThreads());
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -844,13 +844,13 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext  *lc,
     sd::TadPack tadPack;
 
     if(xLen == yLen) {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
     }
     else if(yLen > xLen) {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hYShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hYShapeInfo, dimension, dimensionLength);
     }
     else {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
     }
 
     auto func = PRAGMA_THREADS_FOR {
@@ -878,7 +878,7 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext  *lc,
     auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
     auto zType = sd::ArrayOptions::dataType(hZShapeInfo);
 
-    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
 
     // TODO: make it 2d
     auto func = PRAGMA_THREADS_FOR {
@@ -911,13 +911,13 @@ void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext  *lc,
     sd::TadPack tadPack;
 
     if(xLen == yLen) {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
     }
     else if(yLen > xLen) {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hYShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hYShapeInfo, dimension, dimensionLength);
     }
     else {
-        tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
     }
 
     auto func = PRAGMA_THREADS_FOR {
@@ -969,7 +969,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 #endif
 }
@@ -1006,7 +1006,7 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext  *lc,
     };
 
     auto yLen = shape::length(hScalarShapeInfo);
-    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance()->maxMasterThreads()));
+    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance().maxMasterThreads()));
 
 #endif
 }
@@ -1041,7 +1041,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1,  !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1,  !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 }
 
@@ -1077,7 +1077,7 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext  *lc,
     };
 
     auto yLen = shape::length(hScalarShapeInfo);
-    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance()->maxMasterThreads()));
+    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance().maxMasterThreads()));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1110,7 +1110,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext  *lc,
     };
 
     auto zLen = shape::length(hZShapeInfo);
-    samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_for(func, 0, zLen, 1, !allowParallelism ? 1 : sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(zLen / 1024, sd::Environment::getInstance().maxMasterThreads())));
 
 }
 
@@ -1146,7 +1146,7 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext  *lc,
     };
 
     auto yLen = shape::length(hScalarShapeInfo);
-    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance()->maxMasterThreads()));
+    samediff::Threads::parallel_tad(func, 0, yLen, 1, sd::math::nd4j_min<int>(yLen, sd::Environment::getInstance().maxMasterThreads()));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1259,7 +1259,7 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext  *lc,
         BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformFloat, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, FLOAT_TYPES);
     };
 
-    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads())));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1281,7 +1281,7 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext  *lc,
         BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformBool, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, BOOL_TYPES);
     };
 
-    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads())));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1310,7 +1310,7 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext  *lc,
             BUILD_DOUBLE_SELECTOR(xType, zType, functions::transform::TransformAny, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES, LIBND4J_TYPES);
         };
 
-        samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+        samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads())));
     }
 }
 
@@ -1333,7 +1333,7 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext  *lc,
         BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformSame, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), LIBND4J_TYPES);
     };
 
-    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads())));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -1355,7 +1355,7 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext  *lc,
         BUILD_SINGLE_SELECTOR(xType, functions::transform::TransformStrict, ::exec(opNum, hX, hXShapeInfo, hZ, hZShapeInfo, extraParams, thread_id, numThreads), FLOAT_TYPES);
     };
 
-    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance()->maxMasterThreads())));
+    samediff::Threads::parallel_do(func, sd::math::nd4j_max<int>(1, sd::math::nd4j_min<int>(shape::length(hZShapeInfo) / 1024, sd::Environment::getInstance().maxMasterThreads())));
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp
index ae8a22a6a..f9e3f669c 100644
--- a/libnd4j/include/legacy/cpu/NativeOps.cpp
+++ b/libnd4j/include/legacy/cpu/NativeOps.cpp
@@ -85,12 +85,12 @@ using namespace sd;
 
 void setElementThreshold(int num) {
     if (num > 0)
-        sd::Environment::getInstance()->setElementwiseThreshold(num);
+        sd::Environment::getInstance().setElementwiseThreshold(num);
 }
 
 void setTADThreshold(int num) {
     if (num > 0)
-        sd::Environment::getInstance()->setTadThreshold(num);
+        sd::Environment::getInstance().setTadThreshold(num);
 }
 
 /**
@@ -133,7 +133,7 @@ void  execIndexReduce(Nd4jPointer *extraPointers,int opNum,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension,
                                                                                 dimensionLength);
 
         auto hTADShapeInfo = tadPack.primaryShapeInfo();
@@ -184,8 +184,8 @@ void execBroadcast(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         auto dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
-        auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength);
+        auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(hZShapeInfo, dimension, dimensionLength);
 
         auto hTADShapeInfo = tadPackX.primaryShapeInfo();
         auto hTADOffsets = tadPackX.primaryOffsets();
@@ -223,8 +223,8 @@ void execBroadcastBool(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         auto dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
-        auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength);
+        auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(hZShapeInfo, dimension, dimensionLength);
 
         auto hTADShapeInfo = tadPackX.primaryShapeInfo();
         auto hTADOffsets = tadPackX.primaryOffsets();
@@ -450,7 +450,7 @@ void execReduceFloat2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         auto dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
 
         auto hTADShapeInfo = tadPackX.primaryShapeInfo();
         auto hTADOffsets = tadPackX.primaryOffsets();
@@ -485,7 +485,7 @@ void execReduceBool2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         auto dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension,
                                                                                 dimensionLength);
 
         auto hTADShapeInfo = tadPack.primaryShapeInfo();
@@ -521,7 +521,7 @@ void execReduceSame2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension,
                                                                                 dimensionLength);
 
         auto hTADShapeInfo = tadPack.primaryShapeInfo();
@@ -557,7 +557,7 @@ void execReduceLong2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
 
         auto hTADShapeInfo = tadPack.primaryShapeInfo();
         auto hTADOffsets = tadPack.primaryOffsets();
@@ -663,7 +663,7 @@ void execReduce3Tad(Nd4jPointer *extraPointers,
                                              yTadOnlyShapeInfo, yTadOffsets);
         } else {
             // going tad-way
-            auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension,
+            auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension,
                                                                                     dimensionLength);
 
             auto hTADShapeInfo = tadPack.primaryShapeInfo();
@@ -1060,7 +1060,7 @@ void initializeDevicesAndFunctions() {
 }
 
 void initializeFunctions(Nd4jPointer *functions) {
-    sd::BlasHelper::getInstance()->initializeFunctions(functions);
+    sd::BlasHelper::getInstance().initializeFunctions(functions);
 }
 
 /**
@@ -1208,11 +1208,11 @@ int getAvailableDevices() {
 }
 
 void enableDebugMode(bool reallyEnable) {
-    sd::Environment::getInstance()->setDebug(reallyEnable);
+    sd::Environment::getInstance().setDebug(reallyEnable);
 }
 
 void enableVerboseMode(bool reallyEnable) {
-    sd::Environment::getInstance()->setVerbose(reallyEnable);
+    sd::Environment::getInstance().setVerbose(reallyEnable);
 }
 
 void setGridLimit(int gridSize) {
@@ -1222,7 +1222,7 @@ void setGridLimit(int gridSize) {
 sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* hXShapeInfo, int *dimension, int dimensionLength) {
     auto pack = new TadPack();
     try {
-        *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength);
+        *pack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo, dimension, dimensionLength);
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what());
@@ -1285,7 +1285,7 @@ void pullRowsGeneric(void *vx,
 
     int elementsPerThread = n / TAD_THRESHOLD;
     int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-    _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+    _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
     auto func = PRAGMA_THREADS_FOR {
         for (auto idx = start; idx < stop; idx++) {
@@ -1557,7 +1557,7 @@ void shuffle(Nd4jPointer *extras,
 
 
 bool isExperimentalEnabled() {
-    return sd::Environment::getInstance()->isExperimentalBuild();
+    return sd::Environment::getInstance().isExperimentalBuild();
 }
 
 
@@ -1920,7 +1920,7 @@ Nd4jPointer getResultWrapperPointer(sd::graph::ResultWrapper* ptr) {
 }
 
 const char* getAllCustomOps() {
-    return sd::ops::OpRegistrator::getInstance()->getAllCustomOperations();
+    return sd::ops::OpRegistrator::getInstance().getAllCustomOperations();
 }
 
 template <typename T>
@@ -2016,7 +2016,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla
 
 sd::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
         return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, dArgs, numDArgs);
     } catch (std::exception &e) {
@@ -2047,7 +2047,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla
 
 sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
         return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs);
     } catch (std::exception &e) {
@@ -2059,7 +2059,7 @@ sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash,
 
 int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
         auto context = reinterpret_cast<Context *>(opContext);
 
         return op->execute(context);
@@ -2157,7 +2157,7 @@ Nd4jStatus realExec(sd::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4jL
 
 int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
         return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace);
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
@@ -2170,7 +2170,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat
     try {
         auto graph = sd::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer);
 
-        sd::graph::GraphHolder::getInstance()->registerGraph(graphId, graph);
+        sd::graph::GraphHolder::getInstance().registerGraph(graphId, graph);
 
         return ND4J_STATUS_OK;
     } catch (std::exception &e) {
@@ -2181,7 +2181,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat
 }
 
 static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) {
-    auto graph = sd::graph::GraphHolder::getInstance()->cloneGraph(graphId);
+    auto graph = sd::graph::GraphHolder::getInstance().cloneGraph(graphId);
     auto varSpace = graph->getVariableSpace();
 
     std::vector<sd::NDArray*> handles;
@@ -2264,7 +2264,7 @@ void* getVariableBuffer(sd::graph::Variable* variable) {
 
 int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) {
 
-    sd::graph::GraphHolder::getInstance()->dropGraphAny(graphId);
+    sd::graph::GraphHolder::getInstance().dropGraphAny(graphId);
 
     return sd::Status::OK();
 }
@@ -2294,7 +2294,7 @@ void deleteVariablesSet(sd::graph::VariablesSet* pointer) {
 }
 
 const char* getAllOperations() {
-    return sd::OpTracker::getInstance()->exportOperations();
+    return sd::OpTracker::getInstance().exportOperations();
 }
 
 
@@ -2694,10 +2694,10 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) {
     }
 }
 
-sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) {
+sd::ConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) {
     try {
-        auto buffer = new ConstantDataBuffer();
-        *buffer = sd::ConstantShapeHelper::getInstance()->bufferForShapeInfo(
+        auto buffer = new ConstantShapeBuffer();
+        *buffer = sd::ConstantShapeHelper::getInstance().bufferForShapeInfo(
                 ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty));
         return buffer;
     } catch (std::exception &e) {
@@ -2707,10 +2707,14 @@ sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides
     }
 }
 
-void deleteShapeBuffer(sd::ConstantDataBuffer* ptr) {
+void deleteConstantShapeBuffer(sd::ConstantShapeBuffer* ptr) {
     delete ptr;
 }
 
+void deleteConstantDataBuffer(sd::ConstantDataBuffer* ptr) {
+  delete ptr;
+}
+
 void deleteTadPack(sd::TadPack* ptr) {
     delete ptr;
 }
@@ -2725,7 +2729,7 @@ sd::ConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, i
 
 sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor) {
     try {
-        return sd::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype);
+        return sd::ConstantHelper::getInstance().constantBuffer(*descriptor, dtype);
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what());
@@ -2733,6 +2737,14 @@ sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescripto
     }
 }
 
+Nd4jPointer getConstantShapeBufferPrimary(sd::ConstantShapeBuffer* dbf) {
+  return const_cast<Nd4jLong*>(dbf->primary());
+}
+
+Nd4jPointer getConstantShapeBufferSpecial(sd::ConstantShapeBuffer* dbf) {
+  return const_cast<Nd4jLong*>(dbf->special());
+}
+
 Nd4jPointer getConstantDataBufferPrimary(sd::ConstantDataBuffer* dbf) {
     return dbf->primary();
 }
@@ -2884,7 +2896,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) {
         } else {
             shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape);
         }
-        return const_cast<Nd4jLong*>(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true));
+        return const_cast<Nd4jLong*>(sd::ConstantShapeHelper::getInstance().createFromExisting(shapeBuffer, true));
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what());
@@ -2983,7 +2995,7 @@ const char* runLightBenchmarkSuit(bool printOut) {
 }
 
 Nd4jLong getCachedMemory(int deviceId) {
-    return sd::ConstantHelper::getInstance()->getCachedAmount(deviceId);
+    return sd::ConstantHelper::getInstance().getCachedAmount(deviceId);
 }
 
 const char* runFullBenchmarkSuit(bool printOut) {
diff --git a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu
index f01daffd7..14cbf306a 100644
--- a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu
+++ b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu
@@ -252,7 +252,7 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext  *lc,
     if (yType != xType)
         throw std::runtime_error("NativeOpExecutioner::execBroadcastBool requires both X & Y operands to have same type");
 
-	if (sd::Environment::getInstance()->isDebugAndVerbose())
+	if (sd::Environment::getInstance().isDebugAndVerbose())
 		printf("F3B opNum:[%i]\n", opNum);
 
 	dim3 launchDims(256, 256, 1024);
@@ -437,7 +437,7 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext  *lc,
     if (yType != xType || zType != xType)
         throw std::runtime_error("NativeOpExecutioner::execBroadcastInt requires both X & Y operands to have same type");
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("F3BI opNum:[%i]\n", opNum);
 
     dim3 launchDims(256, 256, 1024);
@@ -583,7 +583,7 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext  *lc,
 	auto stream = lc->getCudaStream();
 	auto reductionPointer = lc->getReductionPointer();
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("SF7 opNum:[%i]\n", opNum);
 
     auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -618,7 +618,7 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext  *lc,
 	auto stream = lc->getCudaStream();
 	auto reductionPointer = lc->getReductionPointer();
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("LF7 opNum:[%i]\n", opNum);
 
     auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -654,7 +654,7 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext  *lc,
 	auto stream = lc->getCudaStream();
 	auto reductionPointer = lc->getReductionPointer();
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("BF7 opNum:[%i]\n", opNum);
 
     auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -701,7 +701,7 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext  *lc,
 	auto reductionPointer = lc->getReductionPointer();
 	auto allocationPointer = lc->getAllocationPointer();
 
-	if (sd::Environment::getInstance()->isDebugAndVerbose())
+	if (sd::Environment::getInstance().isDebugAndVerbose())
 		printf("F2 opNum:[%i]\n", opNum);
 
 	auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -745,7 +745,7 @@ void  NativeOpExecutioner::execReduceFloat(sd::LaunchContext  *lc,
 	auto stream = lc->getCudaStream();
 	auto reductionPointer = lc->getReductionPointer();
 
-	if (sd::Environment::getInstance()->isDebugAndVerbose())
+	if (sd::Environment::getInstance().isDebugAndVerbose())
 		printf("F8 opNum:[%i]\n", opNum);
 
 	auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -780,7 +780,7 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext  *lc,
         									void *hZ, Nd4jLong const* hZShapeInfo,
 											void *dZ, Nd4jLong const* dZShapeInfo){
 
-	if (sd::Environment::getInstance()->isDebug())
+	if (sd::Environment::getInstance().isDebug())
 		printf("F1 opNum:[%i]\n", opNum);
 
 	auto stream = lc->getCudaStream();
@@ -792,7 +792,7 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext  *lc,
     auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth);
     dim3 launchDims(numBlocks == 0 ? 1 : numBlocks, blockWidth, 32768);
 
-	if (sd::Environment::getInstance()->isDebugAndVerbose() && launchDims.x == 1)
+	if (sd::Environment::getInstance().isDebugAndVerbose() && launchDims.x == 1)
 		printf("AF1 opNum:[%i]\n", opNum);
 
 	auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
@@ -1649,12 +1649,12 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext  *lc,
     auto allocationPointer = lc->getAllocationPointer();
 	auto reductionPointer  = lc->getReductionPointer();
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("D119 opNum:[%i]\n", opNum);
 
     dim3 launchDims(shape::length(hZShapeInfo), 256, 32768);
 
-    if (sd::Environment::getInstance()->isVerbose() && launchDims.x == 1)
+    if (sd::Environment::getInstance().isVerbose() && launchDims.x == 1)
         printf("AD119 opNum:[%i]\n", opNum);
 
     auto xType = sd::ArrayOptions::dataType(hXShapeInfo);
diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu
index 465029207..1ccc2c7d5 100755
--- a/libnd4j/include/legacy/cuda/NativeOps.cu
+++ b/libnd4j/include/legacy/cuda/NativeOps.cu
@@ -237,9 +237,9 @@ void execPairwiseTransform( Nd4jPointer *extraPointers,
         InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY});
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
-        NativeOpExecutioner::execPairwiseTransform(&lc, opNum, dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                                                        dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                                                        dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(), extraParams);
+        NativeOpExecutioner::execPairwiseTransform(&lc, opNum, dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                                                        dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                                                        dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), extraParams);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY});
     } catch (std::exception &e) {
@@ -260,9 +260,9 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum,
-                                                       dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                                                       dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                                                       dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                                                       dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                                                       dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                                                       dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                                                        extraParams);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY});
@@ -284,9 +284,9 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum,
-                                                         dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                                                         dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                                                          extraParams,
-                                                         dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                                                         dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                                                          biasCorrected);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
@@ -319,9 +319,9 @@ void execBroadcastBool(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execBroadcastBool(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 dimension, dimensionLength,
                 tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ);
@@ -373,9 +373,9 @@ void   execBroadcast(
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execBroadcast(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 dimension, dimensionLength,
                 tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ);
 
@@ -407,9 +407,9 @@ void execReduceFloat(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceFloatScalar(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>());
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special());
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
     } catch (std::exception &e) {
@@ -429,9 +429,9 @@ void execReduceSame(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceSameScalar(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>());
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special());
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
     } catch (std::exception &e) {
@@ -454,15 +454,15 @@ void execReduceSame2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceSame(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 dimension, dimensionLength,
                 tadPack.specialShapeInfo(), tadPack.specialOffsets());
 
@@ -487,15 +487,15 @@ void execReduceLong2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceLong(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 dimension, dimensionLength,
                 tadPack.specialShapeInfo(), tadPack.specialOffsets());
 
@@ -534,9 +534,9 @@ void   execReduceLong(Nd4jPointer *extraPointers,
 
         BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction,
                               ::execReduceScalar(launchDims, stream, opNum,
-                                      dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(), hXShapeInfo,
+                                      dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), hXShapeInfo,
                                       extraParams,
-                                      dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(), hXShapeInfo,
+                                      dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), hXShapeInfo,
                                       nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES);
 
         sd::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed");
@@ -562,15 +562,15 @@ void execReduceBool2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceBool(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 dimension, dimensionLength,
                 tadPack.specialShapeInfo(), tadPack.specialOffsets());
 
@@ -609,9 +609,9 @@ void   execReduceBool(Nd4jPointer *extraPointers,
 
         BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction,
                               ::execReduceScalar(launchDims, stream, opNum,
-                                      dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(), hXShapeInfo,
+                                      dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), hXShapeInfo,
                                       extraParams,
-                                      dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(), hZShapeInfo,
+                                      dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), hZShapeInfo,
                                       nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES);
 
         sd::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed");
@@ -648,15 +648,15 @@ void execIndexReduce(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execIndexReduce(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 (int *) dbDimension->special(), dimensionLength,
                 tadPack.specialShapeInfo(), tadPack.specialOffsets());
 
@@ -690,15 +690,15 @@ void execReduceFloat2(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduceFloat(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 dimension, dimensionLength,
                 tadPack.specialShapeInfo(), tadPack.specialOffsets());
 
@@ -728,9 +728,9 @@ void execIndexReduceScalar(
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execIndexReduceScalar(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>());
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special());
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
     } catch (std::exception &e) {
@@ -752,8 +752,8 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execTransformSame(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 tadShapeInfo, tadOffsets);
 
@@ -777,8 +777,8 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execTransformBool(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 tadShapeInfo, tadOffsets);
 
@@ -803,8 +803,8 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum,
                          reinterpret_cast<int *>(extraPointers[6]));
 
         NativeOpExecutioner::execTransformAny(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 nullptr, nullptr);
 
@@ -828,8 +828,8 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execTransformStrict(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 tadShapeInfo, tadOffsets);
 
@@ -853,8 +853,8 @@ void execTransformFloat(Nd4jPointer *extraPointers,int opNum,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execTransformFloat(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraParams,
                 tadShapeInfo, tadOffsets);
 
@@ -939,7 +939,7 @@ void enableP2P(bool enable) {
                         cudaDeviceDisablePeerAccess(dY);
                     }
                 } else {
-					if (sd::Environment::getInstance()->isVerbose()) printf("Peer access [%i] -> [%i] isn't possible\n", dX, dY);
+					if (sd::Environment::getInstance().isVerbose()) printf("Peer access [%i] -> [%i] isn't possible\n", dX, dY);
 				}
             }
         }
@@ -983,7 +983,7 @@ void initializeDevicesAndFunctions() {
 }
 
 void initializeFunctions(Nd4jPointer *functions) {
-    sd::BlasHelper::getInstance()->initializeDeviceFunctions(functions);
+    sd::BlasHelper::getInstance().initializeDeviceFunctions(functions);
 	/*
 	cublasSgemv = (CublasSgemv)functions[0];
     cublasDgemv = (CublasDgemv)functions[1];
@@ -1317,7 +1317,7 @@ int getAvailableDevices() {
 }
 
 void enableDebugMode(bool reallyEnable) {
-	sd::Environment::getInstance()->setDebug(reallyEnable);
+	sd::Environment::getInstance().setDebug(reallyEnable);
 }
 
 void setGridLimit(int gridSize) {
@@ -1345,7 +1345,7 @@ void setOmpNumThreads(int threads) {
 }
 
 void enableVerboseMode(bool reallyEnable) {
-	sd::Environment::getInstance()->setVerbose(reallyEnable);
+	sd::Environment::getInstance().setVerbose(reallyEnable);
 }
 
 int getDeviceMajor(int device) {
@@ -1386,7 +1386,7 @@ void specialConcat(
 sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength) {
     try {
         auto pack = new TadPack();
-        *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength);
+        *pack = sd::ConstantTadHelper::getInstance().tadForDimensions(dXShapeInfo, dimension, dimensionLength);
         return pack;
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
@@ -1502,7 +1502,7 @@ void average(Nd4jPointer *extras,
 
         auto dX = reinterpret_cast<void **>(dx);
 
-        if (sd::Environment::getInstance()->isDebugAndVerbose())
+        if (sd::Environment::getInstance().isDebugAndVerbose())
             printf("averageFloat called\n");
 
         auto xType = sd::ArrayOptions::dataType(xShapeInfo);
@@ -1536,7 +1536,7 @@ void accumulate(Nd4jPointer *extras,
 
         auto dX = reinterpret_cast<void **>(dx);
 
-        if (sd::Environment::getInstance()->isDebugAndVerbose())
+        if (sd::Environment::getInstance().isDebugAndVerbose())
             printf("accumulateFloat called\n");
         auto xType = sd::ArrayOptions::dataType(xShapeInfo);
 
@@ -1591,7 +1591,7 @@ void shuffle(Nd4jPointer *extras,
 }
 
 bool isExperimentalEnabled() {
-    return sd::Environment::getInstance()->isExperimentalBuild();
+    return sd::Environment::getInstance().isExperimentalBuild();
 }
 
 void setOmpMinThreads(int threads) {
@@ -1623,9 +1623,9 @@ void execSummaryStats(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execSummaryStats(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 biasCorrected);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
@@ -1653,9 +1653,9 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execSummaryStats(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 reinterpret_cast<int *>(dbDimension->special()), dimensionLength,
                 tadShapeInfo, tadOffsets,
                 biasCorrected);
@@ -1679,10 +1679,10 @@ void execReduce3(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduce3(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>());
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special());
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY});
     } catch (std::exception &e) {
@@ -1708,7 +1708,7 @@ void execReduce3Tad(Nd4jPointer *extraPointers,
         auto dimension = reinterpret_cast<int *>(dbDimension->primary());
         int dimensionLength = static_cast<int>(shape::length(hDimensionShape));
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo,
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(hXShapeInfo,
                                                                                 dimension,
                                                                                 shape::length(hDimensionShape));
         auto tadLength = shape::length(tadPack.primaryShapeInfo());
@@ -1720,18 +1720,18 @@ void execReduce3Tad(Nd4jPointer *extraPointers,
         if (tadLength == yLength || tadLength == xLength) {
             // nd4j_printf("== way\n","");
             NativeOpExecutioner::execReduce3(&lc, opNum,
-                    dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                    dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                     extraParams,
-                    dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                    dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                    dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                    dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                     dimension, dimensionLength,
                     tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets);
         } else
             NativeOpExecutioner::execReduce3TAD(&lc, opNum,
-                    dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                    dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                     extraParams,
-                    dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                    dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                    dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                    dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                     dimension, dimensionLength,
                     tadOnlyShapeInfo, yTadOffsets, yTadOnlyShapeInfo, yTadOffsets);
 
@@ -1753,10 +1753,10 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduce3Scalar(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>());
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special());
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY});
     } catch (std::exception &e) {
@@ -1777,9 +1777,9 @@ void execScalarBool(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execScalarBool(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
-                dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
+                dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(),
                 extraParams);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbScalar});
@@ -1808,10 +1808,10 @@ void execScalarBoolTad(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execScalarBool(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParams,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
-                dbScalars->primary(), hScalarShapeInfo, dbScalars->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
+                dbScalars->primary(), hScalarShapeInfo, dbScalars->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(),
                 dimension, dimensionLength,
                 tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ);
 
@@ -1834,9 +1834,9 @@ void execScalar(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execScalar(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
-                dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hScalarShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
+                dbScalar->primary(), hScalarShapeInfo, dbScalar->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hScalarShapeInfo).special(),
                         extraParams);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbScalar});
@@ -1877,7 +1877,7 @@ void execScalarTad(Nd4jPointer *extraPointers,
 #ifdef __ND4J_EXPERIMENTAL__
         BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES);
 #else
-        BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(), dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(), dbScalars->special(), extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES);
+        BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(), dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(), dbScalars->special(), extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES);
 #endif
 
         DEBUG_KERNEL(stream, opNum);
@@ -1938,7 +1938,7 @@ void execRandom(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execRandom(&lc, opNum, stateHost,
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraArguments);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {});
@@ -1958,8 +1958,8 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execRandom(&lc, opNum, stateHost,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraArguments);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX});
@@ -1980,9 +1980,9 @@ void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execRandom(&lc, opNum, stateHost,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 extraArguments);
 
         InteropDataBuffer::registerSpecialUse({dbZ}, {dbX, dbY});
@@ -2216,10 +2216,10 @@ void execReduce3All(Nd4jPointer *extraPointers,
 
         LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]);
         NativeOpExecutioner::execReduce3All(&lc, opNum,
-                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hXShapeInfo).specialAsT<Nd4jLong>(),
+                dbX->primary(), hXShapeInfo, dbX->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hXShapeInfo).special(),
                 extraParamsVals,
-                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hYShapeInfo).specialAsT<Nd4jLong>(),
-                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance()->bufferForShapeInfo(hZShapeInfo).specialAsT<Nd4jLong>(),
+                dbY->primary(), hYShapeInfo, dbY->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hYShapeInfo).special(),
+                dbZ->primary(), hZShapeInfo, dbZ->special(), ConstantShapeHelper::getInstance().bufferForShapeInfo(hZShapeInfo).special(),
                 reinterpret_cast<int *>(dbDimension->special()), dimensionLength,
                 xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets);
 
@@ -2458,7 +2458,7 @@ void sortTadByKey(Nd4jPointer *extraPointers,
         auto stream = reinterpret_cast<cudaStream_t *>(extraPointers[1]);
         auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext()
                                              : reinterpret_cast<LaunchContext *>(extraPointers[0]);
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
         dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048);
         auto xType = sd::ArrayOptions::dataType(xShapeInfo);
         auto yType = sd::ArrayOptions::dataType(yShapeInfo);
@@ -2485,7 +2485,7 @@ void sortTadByValue(Nd4jPointer *extraPointers,
         auto stream = reinterpret_cast<cudaStream_t *>(extraPointers[1]);
         auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext()
                                              : reinterpret_cast<LaunchContext *>(extraPointers[0]);
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
         dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048);
         auto xType = sd::ArrayOptions::dataType(yShapeInfo);
         auto yType = sd::ArrayOptions::dataType(xShapeInfo);
@@ -2515,7 +2515,7 @@ void sortTad(Nd4jPointer *extraPointers,
         auto stream = reinterpret_cast<cudaStream_t *>(extraPointers[1]);
         auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext()
                                              : reinterpret_cast<LaunchContext *>(extraPointers[0]);
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
         dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768);
         auto xType = sd::ArrayOptions::dataType(xShapeInfo);
         BUILD_SINGLE_SELECTOR(xType, oesTadGeneric,
@@ -2561,7 +2561,7 @@ Nd4jPointer getResultWrapperPointer(sd::graph::ResultWrapper* ptr) {
 
 
 const char* getAllCustomOps() {
-	return sd::ops::OpRegistrator::getInstance()->getAllCustomOperations();
+	return sd::ops::OpRegistrator::getInstance().getAllCustomOperations();
 }
 
 
@@ -2608,7 +2608,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla
 
 sd::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
         return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs,
                                       iArgs, numIArgs, bArgs, numBArgs, dArgs, numDArgs);
@@ -2639,7 +2639,7 @@ sd::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, sd::ops::Decla
 
 sd::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
         return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs);
     } catch (std::exception &e) {
@@ -2742,7 +2742,7 @@ static FORCEINLINE Nd4jStatus realExec(sd::ops::DeclarableOp* op, Nd4jPointer* e
 
 int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
         return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes,
                         numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace);
@@ -2755,7 +2755,7 @@ int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBu
 
 int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) {
     try {
-        auto op = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+        auto op = sd::ops::OpRegistrator::getInstance().getOperation(hash);
         auto context = reinterpret_cast<Context *>(opContext);
 
         auto result = op->execute(context);
@@ -2786,7 +2786,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat
     try {
         auto graph = sd::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer);
 
-        sd::graph::GraphHolder::getInstance()->registerGraph(graphId, graph);
+        sd::graph::GraphHolder::getInstance().registerGraph(graphId, graph);
 
         return ND4J_STATUS_OK;
     } catch (std::exception &e) {
@@ -2798,7 +2798,7 @@ int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flat
 
 
 static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) {
-	auto graph = sd::graph::GraphHolder::getInstance()->pullGraph(graphId);
+	auto graph = sd::graph::GraphHolder::getInstance().pullGraph(graphId);
 	auto varSpace = graph->getVariableSpace()->clone();
 
 	std::vector<sd::NDArray*> handles;
@@ -2887,7 +2887,7 @@ void* getVariableBuffer(sd::graph::Variable* variable) {
 
 int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) {
     try {
-        sd::graph::GraphHolder::getInstance()->dropGraphAny(graphId);
+        sd::graph::GraphHolder::getInstance().dropGraphAny(graphId);
 
         return ND4J_STATUS_OK;
     } catch (std::exception &e) {
@@ -2929,7 +2929,7 @@ void deleteShapeList(Nd4jPointer shapeList) {
 }
 
 const char* getAllOperations() {
-    return sd::OpTracker::getInstance()->exportOperations();
+    return sd::OpTracker::getInstance().exportOperations();
 }
 
 Nd4jPointer getGraphState(Nd4jLong id) {
@@ -3360,7 +3360,7 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) {
         cudaStream_t stream;
         cudaStreamCreate(&stream);
 
-        tryPointerKernel << < 256, 512, len + 64, stream >> > (p, len);
+        tryPointerKernel <<< 256, 512, len + 64, stream>>> (p, len);
         auto e = cudaStreamSynchronize(stream);
 
         if (e != 0)
@@ -3376,10 +3376,11 @@ void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) {
 int dataTypeFromNpyHeader(void *header) {
     return (int) cnpy::dataTypeFromHeader(reinterpret_cast<char *>(header));
 }
-sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) {
+
+OpaqueConstantShapeBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty) {
     try {
-        auto buffer = new ConstantDataBuffer();
-        *buffer = sd::ConstantShapeHelper::getInstance()->bufferForShapeInfo(
+        auto buffer = new ConstantShapeBuffer();
+        *buffer = sd::ConstantShapeHelper::getInstance().bufferForShapeInfo(
                 ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty));
         return buffer;
     } catch (std::exception &e) {
@@ -3389,19 +3390,23 @@ sd::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides
     }
 }
 
-void deleteShapeBuffer(sd::ConstantDataBuffer* ptr) {
+void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer* ptr) {
     delete ptr;
 }
 
+void deleteConstantDataBuffer(OpaqueConstantDataBuffer* ptr) {
+  delete ptr;
+}
+
 void deleteTadPack(sd::TadPack* ptr) {
     delete ptr;
 }
 
 bool isBlasVersionMatches(int major, int minor, int build) {
-    auto result = major == Environment::getInstance()->_blasMajorVersion && minor == Environment::getInstance()->_blasMinorVersion && build == Environment::getInstance()->_blasPatchVersion;
+    auto result = major == Environment::getInstance()._blasMajorVersion && minor == Environment::getInstance()._blasMinorVersion && build == Environment::getInstance()._blasPatchVersion;
 
     if (!result) {
-        nd4j_printf("CUDA/cuBLAS version mismatch. Expected: %i.%i.%i but got %i.%i.%i instead\n", Environment::getInstance()->_blasMajorVersion, Environment::getInstance()->_blasMinorVersion, Environment::getInstance()->_blasPatchVersion, major, minor, build);
+        nd4j_printf("CUDA/cuBLAS version mismatch. Expected: %i.%i.%i but got %i.%i.%i instead\n", Environment::getInstance()._blasMajorVersion, Environment::getInstance()._blasMinorVersion, Environment::getInstance()._blasPatchVersion, major, minor, build);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(152);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage("CUDA/cuBLAS version mismatch");
     }
@@ -3410,15 +3415,15 @@ bool isBlasVersionMatches(int major, int minor, int build) {
 }
 
 sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length) {
-    return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype);
+    return sd::ConstantHelper::getInstance().constantBuffer(ConstantDescriptor(data, length), dtype);
 }
 
 sd::ConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length) {
-    return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype);
+    return sd::ConstantHelper::getInstance().constantBuffer(ConstantDescriptor(data, length), dtype);
 }
 
 sd::ConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor) {
-    return sd::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype);
+    return sd::ConstantHelper::getInstance().constantBuffer(*descriptor, dtype);
 }
 
 
@@ -3435,6 +3440,13 @@ Nd4jLong getConstantDataBufferSizeOf(sd::ConstantDataBuffer* dbf) {
     return dbf->sizeOf();
 }
 
+Nd4jPointer getConstantShapeBufferPrimary(sd::ConstantShapeBuffer* dbf) {
+  return const_cast<Nd4jLong*>(dbf->primary());
+}
+
+Nd4jPointer getConstantShapeBufferSpecial(sd::ConstantShapeBuffer* dbf) {
+  return const_cast<Nd4jLong*>(dbf->special());
+}
 
 sd::graph::Context* createGraphContext(int nodeId) {
     return new sd::graph::Context(nodeId);
@@ -3563,7 +3575,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) {
         } else {
             shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape);
         }
-        return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes
+        return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance().createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes
     } catch (std::exception &e) {
         sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1);
         sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what());
@@ -3612,7 +3624,7 @@ const char* runFullBenchmarkSuit(bool printOut) {
 }
 
 Nd4jLong getCachedMemory(int deviceId) {
-    return sd::ConstantHelper::getInstance()->getCachedAmount(deviceId);
+    return sd::ConstantHelper::getInstance().getCachedAmount(deviceId);
 }
 
 sd::LaunchContext* defaultLaunchContext() {
diff --git a/libnd4j/include/legacy/impl/Environment.cpp b/libnd4j/include/legacy/impl/Environment.cpp
index b19a7147b..38d7e82ed 100644
--- a/libnd4j/include/legacy/impl/Environment.cpp
+++ b/libnd4j/include/legacy/impl/Environment.cpp
@@ -214,11 +214,9 @@ namespace sd {
         _maxDeviceMemory = maxBytes;
     }
 
-    Environment *Environment::getInstance() {
-        if (_instance == 0)
-            _instance = new Environment();
-
-        return _instance;
+    Environment& Environment::getInstance() {
+      static Environment instance;
+      return instance;
     }
 
     bool Environment::isVerbose() {
@@ -353,27 +351,27 @@ namespace sd {
     }
 
     void Environment::setGroupLimit(int group, Nd4jLong numBytes) {
-        sd::memory::MemoryCounter::getInstance()->setGroupLimit((sd::memory::MemoryType) group, numBytes);
+        sd::memory::MemoryCounter::getInstance().setGroupLimit((sd::memory::MemoryType) group, numBytes);
     }
 
     void Environment::setDeviceLimit(int deviceId, Nd4jLong numBytes) {
-        sd::memory::MemoryCounter::getInstance()->setDeviceLimit(deviceId, numBytes);
+        sd::memory::MemoryCounter::getInstance().setDeviceLimit(deviceId, numBytes);
     }
 
     Nd4jLong Environment::getGroupLimit(int group) {
-        return sd::memory::MemoryCounter::getInstance()->groupLimit((sd::memory::MemoryType) group);
+        return sd::memory::MemoryCounter::getInstance().groupLimit((sd::memory::MemoryType) group);
     }
 
     Nd4jLong Environment::getDeviceLimit(int deviceId) {
-        return sd::memory::MemoryCounter::getInstance()->deviceLimit(deviceId);
+        return sd::memory::MemoryCounter::getInstance().deviceLimit(deviceId);
     }
 
     Nd4jLong Environment::getGroupCounter(int group) {
-        return sd::memory::MemoryCounter::getInstance()->allocatedGroup((sd::memory::MemoryType) group);
+        return sd::memory::MemoryCounter::getInstance().allocatedGroup((sd::memory::MemoryType) group);
     }
 
     Nd4jLong Environment::getDeviceCounter(int deviceId) {
-        return sd::memory::MemoryCounter::getInstance()->allocatedDevice(deviceId);
+        return sd::memory::MemoryCounter::getInstance().allocatedDevice(deviceId);
     }
 
     uint64_t Environment::maxPrimaryMemory() {
@@ -383,7 +381,4 @@ namespace sd {
     uint64_t Environment::maxSpecialMemory() {
         return _maxTotalSpecialMemory.load();
     }
-
-    sd::Environment *sd::Environment::_instance = 0;
-
 }
diff --git a/libnd4j/include/loops/cpu/broadcasting.hpp b/libnd4j/include/loops/cpu/broadcasting.hpp
index c0f22313b..4c59de0ec 100644
--- a/libnd4j/include/loops/cpu/broadcasting.hpp
+++ b/libnd4j/include/loops/cpu/broadcasting.hpp
@@ -103,7 +103,7 @@ namespace broadcast {
                 auto tadOffsets = xTadOffset;
 
                 if (xTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
 
                     xTadShapeShapeInfo = tadPack.primaryShapeInfo();
                     tadOffsets = tadPack.primaryOffsets();
@@ -396,7 +396,7 @@ namespace broadcast {
             auto tadOffsets = yTadOffset;
 
             if (yTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength);
+                auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength);
 
                 yTadShapeShapeInfo = tadPack.primaryShapeInfo();
                 tadOffsets = tadPack.primaryOffsets();
@@ -416,7 +416,7 @@ namespace broadcast {
 
             int tadsPerThread = tads / TAD_THRESHOLD;
             int threads = sd::math::nd4j_max<int>(1, tadsPerThread);
-            threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance()->maxThreads());
+            threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance().maxThreads());
 
             auto yEws = shape::elementWiseStride(yTadShapeShapeInfo);
             auto xEws = shape::elementWiseStride(xShapeInfo);
diff --git a/libnd4j/include/loops/cpu/broadcasting_bool.hpp b/libnd4j/include/loops/cpu/broadcasting_bool.hpp
index 18c8705e2..a15935124 100644
--- a/libnd4j/include/loops/cpu/broadcasting_bool.hpp
+++ b/libnd4j/include/loops/cpu/broadcasting_bool.hpp
@@ -115,7 +115,7 @@ namespace broadcast {
                 auto tadOffsets = xTadOffset;
 
                 if (xTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
 
                     xTadShapeShapeInfo = const_cast<Nd4jLong*>(tadPack.primaryShapeInfo());
                     tadOffsets = const_cast<Nd4jLong*>(tadPack.primaryOffsets());
@@ -135,7 +135,7 @@ namespace broadcast {
 
                 int tadsPerThread = tads / TAD_THRESHOLD;
                 int threads = sd::math::nd4j_max<int>(1, tadsPerThread);
-                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance()->maxThreads());
+                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance().maxThreads());
 
                 auto xEws = shape::elementWiseStride(xTadShapeShapeInfo);
                 auto yEws = shape::elementWiseStride(yShapeInfo);
@@ -280,7 +280,7 @@ namespace broadcast {
                 auto tadOffsets = yTadOffset;
 
                 if (yTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength);
 
                     yTadShapeShapeInfo = const_cast<Nd4jLong*>(tadPack.primaryShapeInfo());
                     tadOffsets = const_cast<Nd4jLong*>(tadPack.primaryOffsets());
@@ -300,7 +300,7 @@ namespace broadcast {
 
                 int tadsPerThread = tads / TAD_THRESHOLD;
                 int threads = sd::math::nd4j_max<int>(1, tadsPerThread);
-                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance()->maxThreads());
+                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance().maxThreads());
 
                 auto yEws = shape::elementWiseStride(yTadShapeShapeInfo);
                 auto xEws = shape::elementWiseStride(xShapeInfo);
diff --git a/libnd4j/include/loops/cpu/broadcasting_int.hpp b/libnd4j/include/loops/cpu/broadcasting_int.hpp
index 7d0a995d6..39b251594 100644
--- a/libnd4j/include/loops/cpu/broadcasting_int.hpp
+++ b/libnd4j/include/loops/cpu/broadcasting_int.hpp
@@ -108,7 +108,7 @@ namespace functions {
                 auto tadOffsets = xTadOffset;
 
                 if (xTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
 
                     xTadShapeShapeInfo = const_cast<Nd4jLong*>(tadPack.primaryShapeInfo());
                     tadOffsets = const_cast<Nd4jLong*>(tadPack.primaryOffsets());
@@ -128,7 +128,7 @@ namespace functions {
 
                 int tadsPerThread = tads / TAD_THRESHOLD;
                 int threads = sd::math::nd4j_max<int>(1, tadsPerThread);
-                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance()->maxThreads());
+                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance().maxThreads());
 
                 auto xEws = shape::elementWiseStride(xTadShapeShapeInfo);
                 auto yEws = shape::elementWiseStride(yShapeInfo);
@@ -271,7 +271,7 @@ namespace functions {
                 auto tadOffsets = yTadOffset;
 
                 if (yTadShapeInfo == nullptr || tadOffsets == nullptr) {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength);
 
                     yTadShapeShapeInfo = const_cast<Nd4jLong*>(tadPack.primaryShapeInfo());
                     tadOffsets = const_cast<Nd4jLong*>(tadPack.primaryOffsets());
@@ -291,7 +291,7 @@ namespace functions {
 
                 int tadsPerThread = tads / TAD_THRESHOLD;
                 int threads = sd::math::nd4j_max<int>(1, tadsPerThread);
-                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance()->maxThreads());
+                threads = sd::math::nd4j_min<int>(threads, sd::Environment::getInstance().maxThreads());
 
                 auto yEws = shape::elementWiseStride(yTadShapeShapeInfo);
                 auto xEws = shape::elementWiseStride(xShapeInfo);
diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp
index 9373e3feb..d46dd89d7 100644
--- a/libnd4j/include/loops/cpu/indexreduce.hpp
+++ b/libnd4j/include/loops/cpu/indexreduce.hpp
@@ -64,7 +64,7 @@ Nd4jLong IndexReduce<X, Y>::execScalar(const void *vx, const Nd4jLong *xShapeInf
 
     uint xShapeInfoCast[MAX_RANK];
     bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
-    int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+    int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
     IndexValue<X> intermediatery[64];
     for (int e = 0; e < maxThreads; e++)
         intermediatery[e].index = -1;
@@ -142,7 +142,7 @@ void IndexReduce<X, Z>::exec(const void *vx, const Nd4jLong *xShapeInfo,
         if (dimensionLength < 1)
             return;
 
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
 
         tadOnlyShapeInfo = tadPack.primaryShapeInfo();
         tadOffsets = tadPack.primaryOffsets();
diff --git a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp
index 708f3c0d7..94e156705 100644
--- a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp
+++ b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp
@@ -166,7 +166,7 @@ namespace functions {
                     if (dimensionLength < 1)
                         return;
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
                     tadOnlyShapeInfo = tadPack.primaryShapeInfo();
                     tadOffsets = tadPack.primaryOffsets();
                 }
@@ -193,7 +193,7 @@ namespace functions {
         Z _CUDA_H ReduceBoolFunction<X, Z>::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) {
                 auto x = reinterpret_cast<const X *>(vx);
                 auto extraParams = reinterpret_cast<X *>(vextraParams);
-                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
                 Z intermediate[64];
 
                 PRAGMA_OMP_SIMD
diff --git a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp
index 1795dbc3d..6be93b1c4 100644
--- a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp
+++ b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp
@@ -70,7 +70,7 @@ namespace functions {
                 auto startingValue = OpType::startingValue(x);
                 uint xShapeInfoCast[MAX_RANK];
                 const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
-                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
                 Z intermediate[64];
 
                 PRAGMA_OMP_SIMD
@@ -200,7 +200,7 @@ namespace functions {
                     if (dimensionLength < 0)
                         return;
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
                     tadOnlyShapeInfo = tadPack.primaryShapeInfo();
                     tadOffsets = tadPack.primaryOffsets();
                 }
@@ -229,7 +229,7 @@ namespace functions {
 
             auto x = reinterpret_cast<const X *>(vx);
             auto extraParams = reinterpret_cast<Z *>(vextraParams);
-            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
             Z intermediate[64];
 
             PRAGMA_OMP_SIMD
diff --git a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp
index c1fd4385c..a4fae3228 100644
--- a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp
+++ b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp
@@ -65,7 +65,7 @@ namespace functions {
                 auto startingValue = OpType::startingValue(x);
                 uint xShapeInfoCast[MAX_RANK];
                 const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
-                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
                 Z intermediate[64];
 
                 PRAGMA_OMP_SIMD
@@ -187,7 +187,7 @@ namespace functions {
                     if (dimensionLength < 1)
                         return;
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
                     tadOnlyShapeInfo = tadPack.primaryShapeInfo();
                     tadOffsets = tadPack.primaryOffsets();
                 }
@@ -215,7 +215,7 @@ namespace functions {
 
             auto x = reinterpret_cast<const X *>(vx);
             auto extraParams = reinterpret_cast<X *>(vextraParams);
-            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
             Z intermediate[64];
 
             PRAGMA_OMP_SIMD
diff --git a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp
index 2516767b6..10607fb6d 100644
--- a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp
+++ b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp
@@ -67,7 +67,7 @@ namespace functions {
                 auto startingValue = OpType::startingValue(x);
                 uint xShapeInfoCast[MAX_RANK];
                 const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
-                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+                int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
                 X intermediate[64];
 
                 PRAGMA_OMP_SIMD
@@ -196,7 +196,7 @@ namespace functions {
                     if (dimensionLength < 1)
                         return;
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
                     tadOnlyShapeInfo = tadPack.primaryShapeInfo();
                     tadOffsets = tadPack.primaryOffsets();
                 }
@@ -224,7 +224,7 @@ namespace functions {
 
             auto x = reinterpret_cast<const X *>(vx);
             auto extraParams = reinterpret_cast<X *>(vextraParams);
-            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+            int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
             X intermediate[64];
 
             PRAGMA_OMP_SIMD
diff --git a/libnd4j/include/loops/cpu/reduce3.hpp b/libnd4j/include/loops/cpu/reduce3.hpp
index 3a830377e..a19c7c1a1 100644
--- a/libnd4j/include/loops/cpu/reduce3.hpp
+++ b/libnd4j/include/loops/cpu/reduce3.hpp
@@ -65,7 +65,7 @@ void Reduce3<X,Z>::execScalar(const void *vx, const Nd4jLong *xShapeInfo,
     const bool canCastX = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
 
     Z startingVal = OpType::startingValue(x);
-    int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance()->maxThreads());
+    int maxThreads = sd::math::nd4j_min<int>(64, sd::Environment::getInstance().maxThreads());
     Z intermediate[64];
     Z extraParamsLocal[3 * 64];
 
diff --git a/libnd4j/include/loops/cpu/scalar.hpp b/libnd4j/include/loops/cpu/scalar.hpp
index 236ba7e25..f539f387f 100644
--- a/libnd4j/include/loops/cpu/scalar.hpp
+++ b/libnd4j/include/loops/cpu/scalar.hpp
@@ -65,7 +65,7 @@ void ScalarTransform<X, Y, Z>::transform(const void *vx, const Nd4jLong *xShapeI
         return;
     }
 
-    int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance()->maxThreads());
+    int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance().maxThreads());
 
     if (kindOfLoop == sd::LoopKind::EWS1) {
         for (auto r = start; r < stop; r++) {
diff --git a/libnd4j/include/loops/cpu/scalar_bool.cpp b/libnd4j/include/loops/cpu/scalar_bool.cpp
index 72513c10d..63182bdc3 100644
--- a/libnd4j/include/loops/cpu/scalar_bool.cpp
+++ b/libnd4j/include/loops/cpu/scalar_bool.cpp
@@ -66,7 +66,7 @@ namespace functions {
                 return;
             }
 
-            int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance()->maxThreads());
+            int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance().maxThreads());
 
             if (kindOfLoop == sd::LoopKind::EWS1) {
                 for (auto r = start; r < stop; r++) {
diff --git a/libnd4j/include/loops/cpu/scalar_int.cpp b/libnd4j/include/loops/cpu/scalar_int.cpp
index 1a8f5bcca..adf53e7f6 100644
--- a/libnd4j/include/loops/cpu/scalar_int.cpp
+++ b/libnd4j/include/loops/cpu/scalar_int.cpp
@@ -66,7 +66,7 @@ namespace functions {
                 return;
             }
 
-            int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance()->maxThreads());
+            int num_threads = sd::math::nd4j_min<int>(numTads, sd::Environment::getInstance().maxThreads());
 
             if (kindOfLoop == sd::LoopKind::EWS1) {
                 for (auto r = start; r < stop; r++) {
diff --git a/libnd4j/include/loops/cpu/summarystatsreduce.cpp b/libnd4j/include/loops/cpu/summarystatsreduce.cpp
index 2d53671d2..63993d853 100644
--- a/libnd4j/include/loops/cpu/summarystatsreduce.cpp
+++ b/libnd4j/include/loops/cpu/summarystatsreduce.cpp
@@ -127,7 +127,7 @@ namespace functions {
             if (dimensionLength < 1)
                 return;
 
-            auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
+            auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
 
             //pre squeezed: this is for keeping the pointer to the original
             //shape information for tad offset
diff --git a/libnd4j/include/loops/cuda/legacy/transform.legacy b/libnd4j/include/loops/cuda/legacy/transform.legacy
index e7f76751a..88a4ceb16 100644
--- a/libnd4j/include/loops/cuda/legacy/transform.legacy
+++ b/libnd4j/include/loops/cuda/legacy/transform.legacy
@@ -173,7 +173,7 @@ namespace functions {
 
             DISPATCH_SIMPLE(transformShaped, float16, PARAMS(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(TRANSFORM_OPS))
 
-            if (sd::Environment::getInstance()->isDebug())
+            if (sd::Environment::getInstance().isDebug())
 		        checkCudaErrors(cudaStreamSynchronize(*stream));
         }
 
diff --git a/libnd4j/include/loops/cuda/scalar.chpp b/libnd4j/include/loops/cuda/scalar.chpp
index b412e4957..93b76f910 100644
--- a/libnd4j/include/loops/cuda/scalar.chpp
+++ b/libnd4j/include/loops/cuda/scalar.chpp
@@ -152,7 +152,7 @@ void _CUDA_H ScalarTransform<X,Y,Z>::intermediateAlongDimension(dim3& launchDims
 template<typename X, typename Y, typename Z>
 void ScalarTransform<X,Y,Z>::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams) {
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
 	   printf("H14 opNum:[%i]\n", opNum);
 
     DISPATCH_BY_OPNUM_TTT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, hxShapeInfo, vz, zShapeInfo, hzShapeInfo, vscalar, vextraParams, nullptr), SCALAR_OPS);
diff --git a/libnd4j/include/loops/cuda/scalar_bool.cu b/libnd4j/include/loops/cuda/scalar_bool.cu
index e23560778..0976e60ad 100644
--- a/libnd4j/include/loops/cuda/scalar_bool.cu
+++ b/libnd4j/include/loops/cuda/scalar_bool.cu
@@ -218,7 +218,7 @@ void ScalarBoolTransform<X,Y>::executeCudaShaped(dim3& launchDims, cudaStream_t
                                                 void const* vscalar,
                                                 void const* vextraParams) {
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("H14 opNum:[%i]\n", opNum);
 
     DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, const_cast<void*>(vextraParams), nullptr), SCALAR_BOOL_OPS);
diff --git a/libnd4j/include/loops/cuda/scalar_int.cu b/libnd4j/include/loops/cuda/scalar_int.cu
index 2ca0ade26..b8cac0846 100644
--- a/libnd4j/include/loops/cuda/scalar_int.cu
+++ b/libnd4j/include/loops/cuda/scalar_int.cu
@@ -216,7 +216,7 @@ void ScalarIntTransform<X>::executeCudaShaped(dim3& launchDims, cudaStream_t *st
                                                 void const* vscalar,
                                                 void* vextraParams) {
 
-    if (sd::Environment::getInstance()->isDebugAndVerbose())
+    if (sd::Environment::getInstance().isDebugAndVerbose())
         printf("H14 opNum:[%i]\n", opNum);
 
     DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, vextraParams, nullptr), SCALAR_INT_OPS);
diff --git a/libnd4j/include/loops/cuda/summarystatsreduce.cu b/libnd4j/include/loops/cuda/summarystatsreduce.cu
index 3d94b9097..521ac5b06 100644
--- a/libnd4j/include/loops/cuda/summarystatsreduce.cu
+++ b/libnd4j/include/loops/cuda/summarystatsreduce.cu
@@ -344,7 +344,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI
             auto z = reinterpret_cast<Z*>(vz);
             auto reductionPointerA = reinterpret_cast<Z*>(reductionBuffer);
 
-            if (sd::Environment::getInstance()->isDebugAndVerbose())
+            if (sd::Environment::getInstance().isDebugAndVerbose())
                 printf("D16 opNum:[%i]\n", opNum);
 
             summaryStatsReduceT<X,Z><<<launchDims.x,launchDims.y,launchDims.z, *stream>>>(
@@ -369,7 +369,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI
             auto z = static_cast<Z*>(vz);
             auto extraParams = static_cast<Z*>(vextraParams);
 
-            if (sd::Environment::getInstance()->isDebugAndVerbose())
+            if (sd::Environment::getInstance().isDebugAndVerbose())
                 printf("F17 opNum:[%i]\n", opNum);
 
             auto reductionPointerA = reinterpret_cast<Z*>(reductionBuffer);
@@ -396,7 +396,7 @@ void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeI
             auto z = static_cast<Z*>(vz);
             auto extraParams = static_cast<Z*>(vextraParams);
 
-            if (sd::Environment::getInstance()->isDebugAndVerbose())
+            if (sd::Environment::getInstance().isDebugAndVerbose())
                 printf("D18 opNum:[%i]\n", opNum);
 
             summaryStatsReduceT<X, Z><<<launchDims.x,launchDims.y,launchDims.z, *stream>>>(
diff --git a/libnd4j/include/memory/MemoryCounter.h b/libnd4j/include/memory/MemoryCounter.h
index 91aaeecff..160c24379 100644
--- a/libnd4j/include/memory/MemoryCounter.h
+++ b/libnd4j/include/memory/MemoryCounter.h
@@ -34,8 +34,6 @@ namespace sd {
          */
         class ND4J_EXPORT MemoryCounter {
         private:
-            static MemoryCounter* _INSTANCE;
-
             // used for synchronization
             std::mutex _locker;
 
@@ -56,7 +54,7 @@ namespace sd {
             ~MemoryCounter() = default;
 
         public:
-            static MemoryCounter *getInstance();
+            static MemoryCounter & getInstance();
 
             /**
              * This method checks if allocation of numBytes won't break through per-group or per-device limit
diff --git a/libnd4j/include/memory/MemoryRegistrator.h b/libnd4j/include/memory/MemoryRegistrator.h
index ad1b0333a..70afafb42 100644
--- a/libnd4j/include/memory/MemoryRegistrator.h
+++ b/libnd4j/include/memory/MemoryRegistrator.h
@@ -32,7 +32,6 @@ namespace sd {
     namespace memory {
         class ND4J_EXPORT MemoryRegistrator {
         protected:
-            static MemoryRegistrator* _INSTANCE;
             Workspace* _workspace;
             MAP_IMPL<Nd4jLong, Nd4jLong> _footprint;
             std::mutex _lock;
@@ -40,7 +39,7 @@ namespace sd {
             MemoryRegistrator();
             ~MemoryRegistrator() = default;
         public:
-            static MemoryRegistrator* getInstance();
+            static MemoryRegistrator& getInstance();
             bool hasWorkspaceAttached();
             Workspace* getWorkspace();
             void attachWorkspace(Workspace* workspace);
diff --git a/libnd4j/include/memory/MemoryTracker.h b/libnd4j/include/memory/MemoryTracker.h
index 38bb926ca..dd99905bd 100644
--- a/libnd4j/include/memory/MemoryTracker.h
+++ b/libnd4j/include/memory/MemoryTracker.h
@@ -35,7 +35,6 @@ namespace sd {
          */
         class ND4J_EXPORT MemoryTracker {
         private:
-            static MemoryTracker* _INSTANCE;
             std::map<Nd4jLong, AllocationEntry> _allocations;
             std::map<Nd4jLong, AllocationEntry> _released;
             std::mutex _locker;
@@ -43,7 +42,7 @@ namespace sd {
             MemoryTracker();
             ~MemoryTracker() = default;
         public:
-            static MemoryTracker* getInstance();
+            static MemoryTracker& getInstance();
 
             void countIn(MemoryType type, Nd4jPointer ptr, Nd4jLong numBytes);
             void countOut(Nd4jPointer ptr);
diff --git a/libnd4j/include/memory/impl/MemoryCounter.cpp b/libnd4j/include/memory/impl/MemoryCounter.cpp
index 96be34681..287b19897 100644
--- a/libnd4j/include/memory/impl/MemoryCounter.cpp
+++ b/libnd4j/include/memory/impl/MemoryCounter.cpp
@@ -36,19 +36,17 @@ namespace sd {
             }
 
             // setting initial values for limits
-            _groupLimits[sd::memory::MemoryType::HOST] = sd::Environment::getInstance()->maxPrimaryMemory();
-            _groupLimits[sd::memory::MemoryType::DEVICE] = sd::Environment::getInstance()->maxSpecialMemory();
+            _groupLimits[sd::memory::MemoryType::HOST] = sd::Environment::getInstance().maxPrimaryMemory();
+            _groupLimits[sd::memory::MemoryType::DEVICE] = sd::Environment::getInstance().maxSpecialMemory();
 
             // setting initial counter values
             _groupCounters[sd::memory::MemoryType::HOST] = 0;
             _groupCounters[sd::memory::MemoryType::DEVICE] = 0;
         }
 
-        MemoryCounter* MemoryCounter::getInstance() {
-            if (_INSTANCE == 0)
-                _INSTANCE = new MemoryCounter();
-
-            return _INSTANCE;
+        MemoryCounter& MemoryCounter::getInstance() {
+          static MemoryCounter instance;
+          return instance;
         }
 
         void MemoryCounter::countIn(int deviceId, Nd4jLong numBytes) {
@@ -127,7 +125,5 @@ namespace sd {
             std::lock_guard<std::mutex> lock(_locker);
             return _groupLimits[group];
         }
-
-        MemoryCounter* MemoryCounter::_INSTANCE = 0;
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/memory/impl/MemoryRegistrator.cpp b/libnd4j/include/memory/impl/MemoryRegistrator.cpp
index 31b4b0eae..0ac2bf0cb 100644
--- a/libnd4j/include/memory/impl/MemoryRegistrator.cpp
+++ b/libnd4j/include/memory/impl/MemoryRegistrator.cpp
@@ -27,11 +27,9 @@ namespace sd {
             _workspace = nullptr;
         };
 
-        MemoryRegistrator* MemoryRegistrator::getInstance() {
-            if (_INSTANCE == 0)
-                _INSTANCE = new MemoryRegistrator();
-
-            return _INSTANCE;
+        MemoryRegistrator& MemoryRegistrator::getInstance() {
+          static MemoryRegistrator instance;
+          return instance;
         }
 
         bool MemoryRegistrator::hasWorkspaceAttached() {
@@ -83,8 +81,5 @@ namespace sd {
 
             return result;
         }
-
-        MemoryRegistrator* MemoryRegistrator::_INSTANCE = 0;
-
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/memory/impl/MemoryTracker.cpp b/libnd4j/include/memory/impl/MemoryTracker.cpp
index 5ebb4fd16..cf2b975cf 100644
--- a/libnd4j/include/memory/impl/MemoryTracker.cpp
+++ b/libnd4j/include/memory/impl/MemoryTracker.cpp
@@ -40,11 +40,9 @@ namespace sd {
             //
         }
 
-        MemoryTracker* MemoryTracker::getInstance() {
-            if (_INSTANCE == 0)
-                _INSTANCE = new MemoryTracker();
-
-            return _INSTANCE;
+        MemoryTracker& MemoryTracker::getInstance() {
+            static MemoryTracker instance;
+            return instance;
         }
 
 #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD)  && !defined(SD_APPLE_BUILD)
@@ -99,7 +97,7 @@ namespace sd {
 
         void MemoryTracker::countIn(MemoryType type, Nd4jPointer ptr, Nd4jLong numBytes) {
 #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD)  && !defined(SD_APPLE_BUILD)
-            if (Environment::getInstance()->isDetectingLeaks()) {
+            if (Environment::getInstance().isDetectingLeaks()) {
                 auto lptr = reinterpret_cast<Nd4jLong>(ptr);
 
                 _locker.lock();
@@ -133,7 +131,7 @@ namespace sd {
 
         void MemoryTracker::countOut(Nd4jPointer ptr) {
 #if defined(__GNUC__) && !defined(__MINGW64__) && !defined(SD_ANDROID_BUILD) && !defined(SD_IOS_BUILD)  && !defined(SD_APPLE_BUILD)
-            if (Environment::getInstance()->isDetectingLeaks()) {
+            if (Environment::getInstance().isDetectingLeaks()) {
                 auto lptr = reinterpret_cast<Nd4jLong>(ptr);
 
                 _locker.lock();
@@ -172,7 +170,5 @@ namespace sd {
             _allocations.clear();
             _released.clear();
         }
-
-        MemoryTracker* MemoryTracker::_INSTANCE = 0;
     }
 }
diff --git a/libnd4j/include/ops/declarable/OpRegistrator.h b/libnd4j/include/ops/declarable/OpRegistrator.h
index 3a9fb3df6..a4967d877 100644
--- a/libnd4j/include/ops/declarable/OpRegistrator.h
+++ b/libnd4j/include/ops/declarable/OpRegistrator.h
@@ -97,7 +97,7 @@ namespace sd {
         public:
             ~OpRegistrator();
 
-            static OpRegistrator* getInstance();
+            static OpRegistrator& getInstance();
 
             static void exitHandler();
             static void sigIntHandler(int sig);
diff --git a/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp b/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp
index 65f81b428..693ebf7c6 100644
--- a/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp
+++ b/libnd4j/include/ops/declarable/generic/bitwise/bits_hamming_distance.cpp
@@ -41,7 +41,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(bits_hamming_distance) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64));
         }
 
         DECLARE_TYPES(bits_hamming_distance) {
diff --git a/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp b/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp
index 194af35b8..79227e2ba 100644
--- a/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp
+++ b/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp
@@ -110,7 +110,7 @@ DECLARE_SHAPE_FN(batched_gemm) {
     auto shapeList = SHAPELIST();
 
     if (!(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0)) {
-        shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'c', {1, 1}));
+        shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'c', {1, 1}));
         return shapeList;
     }
 
@@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(batched_gemm) {
     std::vector<Nd4jLong> shape({M, N});
 
     for (int e = 0; e < batchSize; e++) {
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'f', shape);
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(0)), 'f', shape);
         shapeList->push_back(newShape);
     }
 
diff --git a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp
index c9d8c9476..f8ee952a8 100644
--- a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp
+++ b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp
@@ -131,7 +131,7 @@ DECLARE_SHAPE_FN(matmul) {
     // we just pick the higher data type out of X and Y
     auto dtypeZ = dtypeX > dtypeY ? dtypeX : dtypeY;
 
-    auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtypeZ, zOrder, zShapeOnly);
+    auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtypeZ, zOrder, zShapeOnly);
     return SHAPELIST(newShape);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp b/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp
index 889bd4957..0ae64b8cd 100644
--- a/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp
+++ b/libnd4j/include/ops/declarable/generic/blas/tensormmul.cpp
@@ -80,7 +80,7 @@ DECLARE_SHAPE_FN(tensormmul) {
     std::vector<Nd4jLong> shapeAt, shapeBt;
     auto outShape = sd::ShapeUtils::evalShapeForTensorDot(aShapeInfo, bShapeInfo, axes_0, axes_1, permutAt, permutBt, shapeAt, shapeBt);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(aShapeInfo), 'c', outShape)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(aShapeInfo), 'c', outShape)));
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp
index e5d67baf1..a28d8230b 100644
--- a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp
+++ b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp
@@ -86,9 +86,9 @@ namespace sd {
                 helpers::chooseFunctorScalar(block.launchContext(), first, scalar, mode, nullptr, &numResults);
             }
 
-            auto newShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(numResults.e<Nd4jLong>(0), ArrayOptions::dataType(inputShape->at(0)));
+            auto newShape = ConstantShapeHelper::getInstance().vectorShapeInfo(numResults.e<Nd4jLong>(0), ArrayOptions::dataType(inputShape->at(0)));
 
-            auto shapeScalar = ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64);
+            auto shapeScalar = ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64);
             return SHAPELIST(newShape, shapeScalar);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/boolean/where.cpp b/libnd4j/include/ops/declarable/generic/boolean/where.cpp
index c26179179..a72de2ee0 100644
--- a/libnd4j/include/ops/declarable/generic/boolean/where.cpp
+++ b/libnd4j/include/ops/declarable/generic/boolean/where.cpp
@@ -117,7 +117,7 @@ namespace sd {
                     theNewShape = CONSTANT(newShape);
                 }
                 else {
-                    theNewShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64);
+                    theNewShape = ConstantShapeHelper::getInstance().emptyShapeInfo(sd::DataType::INT64);
                 }
 
                 return SHAPELIST(theNewShape);
diff --git a/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp b/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp
index 65cb52cdd..23284b2f9 100644
--- a/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp
+++ b/libnd4j/include/ops/declarable/generic/boolean/where_np.cpp
@@ -139,11 +139,11 @@ namespace sd {
                 // output shape - a tuple of rank(inShape) 1D tensors with numOfTrue len
                 if (numOfTrue) {
                     for (Nd4jLong e = 0; e < condition->rankOf(); ++e) {
-                        shapes->push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(numOfTrue, sd::DataType::INT64));
+                        shapes->push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(numOfTrue, sd::DataType::INT64));
                     }
                 }
                 else {
-                    shapes->push_back(ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64));
+                    shapes->push_back(ConstantShapeHelper::getInstance().emptyShapeInfo(sd::DataType::INT64));
                 }
             }
             return shapes;
diff --git a/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp b/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp
index 95dbdfcea..a2dcd6b14 100644
--- a/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp
+++ b/libnd4j/include/ops/declarable/generic/compat/compat_sparse_to_dense.cpp
@@ -56,7 +56,7 @@ namespace sd {
             auto dtype = values->dataType();
 
             // basically output shape is defined by the type of input, and desired shape input
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape->getBufferAsVector<Nd4jLong>()));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape->getBufferAsVector<Nd4jLong>()));
         }
 
         DECLARE_TYPES(compat_sparse_to_dense) {
diff --git a/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp b/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp
index 40e080a8f..009652178 100644
--- a/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp
+++ b/libnd4j/include/ops/declarable/generic/compat/compat_string_split.cpp
@@ -121,8 +121,8 @@ namespace sd {
             // values tensor is going to be vector always
             // indices tensor is going to be vector with length equal to values.length * output rank
 
-            auto valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(cnt, sd::DataType::UTF8);
-            auto indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(cnt * (input->rankOf() + 1), sd::DataType::INT64);
+            auto valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(cnt, sd::DataType::UTF8);
+            auto indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(cnt * (input->rankOf() + 1), sd::DataType::INT64);
 
             return SHAPELIST(indicesShape, valuesShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp
index 4b77e2a45..7e89ce2c0 100644
--- a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp
+++ b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp
@@ -75,8 +75,8 @@ namespace sd {
             auto input = inputShape->at(0);
 
             auto outputLength = shape::length(input) / 16 + 5;
-            auto encodedShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(outputLength, DataType::INT32);
-            auto encodedCounter = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32);
+            auto encodedShape = ConstantShapeHelper::getInstance().vectorShapeInfo(outputLength, DataType::INT32);
+            auto encodedCounter = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32);
             return SHAPELIST(input, encodedShape, encodedCounter);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/compression/threshold.cpp b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp
index 9512621e8..83836bb8f 100644
--- a/libnd4j/include/ops/declarable/generic/compression/threshold.cpp
+++ b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp
@@ -65,7 +65,7 @@ namespace sd {
                 elements = 0;
 
             // result array must have 4 additional int elements for header
-            return SHAPELIST(x->shapeInfo(), sd::ConstantShapeHelper::getInstance()->vectorShapeInfo(elements + 4, DataType::INT32));
+            return SHAPELIST(x->shapeInfo(), sd::ConstantShapeHelper::getInstance().vectorShapeInfo(elements + 4, DataType::INT32));
         }
 
         DECLARE_TYPES(encode_threshold) {
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp b/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp
index fe42d7057..294406cb8 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/bitcast.cpp
@@ -64,11 +64,11 @@ namespace sd {
             auto outputSize = DataTypeUtils::sizeOf(newType);
 
             if (shape::length(inShape) == 0)
-                return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType)));
+                return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType)));
 
             if (inputSize == outputSize) {
                 // only type should be changed
-                return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType)));
+                return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType)));
             }
             else if (inputSize > outputSize) {
                 // range of output increased by 1 with inputSize / outputSize as last dimension
@@ -78,7 +78,7 @@ namespace sd {
                     shapeOf[i] = inShape[i + 1];
                 }
                 shapeOf[i] = inputSize / outputSize;
-                auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(newType, shape::order(inShape), shapeOf);
+                auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(newType, shape::order(inShape), shapeOf);
                 return SHAPELIST(outputShape);
             }
             REQUIRE_TRUE(shape::sizeAt(inShape, -1) == outputSize / inputSize, 0, "BITCAST: %llu > %llu. So last dimension should be %i, but %i given.", inputSize, outputSize, outputSize / inputSize, shape::sizeAt(inShape, -1));
@@ -88,7 +88,7 @@ namespace sd {
                 shapeOf[i] = inShape[i + 1];
             }
 
-            auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(newType, shape::order(inShape), shapeOf);
+            auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(newType, shape::order(inShape), shapeOf);
             return SHAPELIST(outputShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp b/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp
index cf8729d2f..ff071f7a9 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/cast.cpp
@@ -49,7 +49,7 @@ namespace sd {
             auto it = INT_ARG(0);
             DataType newType = DataTypeUtils::fromInt(it);
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType)));
         }
 
         DECLARE_TYPES(cast) {
diff --git a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h
index 7df331c4d..af7f2d8d7 100644
--- a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h
+++ b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h
@@ -40,7 +40,7 @@ namespace sd {
                 }
 
                 std::unique_ptr<NDArray> ptr;
-                if (!Environment::getInstance()->isExperimentalBuild()) {
+                if (!Environment::getInstance().isExperimentalBuild()) {
                     if (y->dataType() != x->dataType()) {
                         y = new NDArray(y->cast(x->dataType()));
                         std::unique_ptr<NDArray> ptr2(y);
diff --git a/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp b/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp
index b8ce12d64..3c101070d 100644
--- a/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/crop_and_resize.cpp
@@ -76,7 +76,7 @@ namespace sd {
             outputShape[2] = height;
             outputShape[3] = in[4];
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(in), shape::order(in), outputShape, 4)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(in), shape::order(in), outputShape, 4)));
         }
 
         DECLARE_TYPES(crop_and_resize) {
diff --git a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
index 4e680b337..8e6e29d3a 100644
--- a/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/image_resize.cpp
@@ -82,7 +82,7 @@ namespace sd {
             auto dtype = DataType::FLOAT32;
             if (method == helpers::ImageResizeMethods::kResizeNearest)
                 dtype = ArrayOptions::dataType(in);
-            auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector<Nd4jLong>{in[1], height, width, in[4]}:std::vector<Nd4jLong>{ height, width, in[4]});
+            auto shape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape::rank(in) == 4?std::vector<Nd4jLong>{in[1], height, width, in[4]}:std::vector<Nd4jLong>{ height, width, in[4]});
 
             return SHAPELIST(shape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
index 18d048450..a26e47746 100644
--- a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp
@@ -118,7 +118,7 @@ namespace sd {
             else if (shape::rank(in) == 3)
                 shape = {height, width, in[3]};
 
-            auto outShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, shape::order(in), shape);
+            auto outShape = ConstantShapeHelper::getInstance().createShapeInfo(DataType::FLOAT32, shape::order(in), shape);
             return SHAPELIST(outShape);
         }
         DECLARE_TYPES(resize_images) {
diff --git a/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp b/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp
index f7378d333..a6d80365c 100644
--- a/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp
+++ b/libnd4j/include/ops/declarable/generic/images/rgbToGrs.cpp
@@ -67,7 +67,7 @@ DECLARE_SHAPE_FN(rgb_to_grs) {
     auto nShape = input->getShapeAsVector();
     nShape[dimC] = 1;
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(input->dataType(), input->ordering(), nShape));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(input->dataType(), input->ordering(), nShape));
 }
 
 }
diff --git a/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp b/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp
index 8ef699aa2..334014ee7 100644
--- a/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp
+++ b/libnd4j/include/ops/declarable/generic/kernels/knn_mindistance.cpp
@@ -45,7 +45,7 @@ namespace sd {
             auto input = inputShape->at(0);
 
             // always return scalar here
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(input)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(input)));
         }
 
         DECLARE_TYPES(knn_mindistance) {
diff --git a/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp
index 925c4b6c1..6562a02a8 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/diagPart.cpp
@@ -72,7 +72,7 @@ namespace ops  {
 
 			ShapeUtils::updateStridesAndType(outShapeInfo, inputShapeInfo, shape::order(inputShapeInfo));
 
-    		return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(outShapeInfo, block.workspace()));
+    		return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(outShapeInfo, block.workspace()));
 		}
 
 
diff --git a/libnd4j/include/ops/declarable/generic/linalg/eye.cpp b/libnd4j/include/ops/declarable/generic/linalg/eye.cpp
index 41469468c..4bf339614 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/eye.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/eye.cpp
@@ -100,7 +100,7 @@ namespace ops {
         }
 
         shape::updateStrides(outShapeInfo, static_cast<char>(-params[0]));
-        auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo, dtype));
+        auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo, dtype));
         RELEASE(outShapeInfo, block.getWorkspace());
         return SHAPELIST(result);
     }
diff --git a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp
index 81831e3fc..5078ff6f1 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp
@@ -92,10 +92,10 @@ namespace sd {
             if (shape::isEmpty(in0) || shape::isEmpty(in1)) {
                 shapeOf[rank - 1] = 0; // set output shape to empty
             }
-            auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace());
+            auto resShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace());
             if (shapeOf[rank - 1] == 0) {
 //                ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY);
-                resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in0));
+                resShape = ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(in0));
             }
             return SHAPELIST(resShape);
         }
@@ -116,9 +116,9 @@ namespace sd {
             if (shape::isEmpty(in0) || shape::isEmpty(in1)) {
                 shapeOf[rank - 1] = 0; // set output shape to empty
             }
-            auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace());
+            auto resShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace());
             if (shapeOf[rank - 1] == 0) {
-                resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in1));
+                resShape = ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(in1));
 //                ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY);
             }
             return SHAPELIST(resShape);
diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp
index deabe8443..db73fac75 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp
@@ -46,7 +46,7 @@ namespace sd {
             int lastDimension = sd::math::nd4j_min(shape::sizeAt(in, -1), shape::sizeAt(in, -2));
             if(outRank == 1) {
                 //output shape is a vector with size min(sizeAt(0), sizeAt(1))
-                outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(lastDimension, ArrayOptions::dataType(in));
+                outShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(lastDimension, ArrayOptions::dataType(in));
             }
             else {
                 Nd4jLong* anShapeInfo;
diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp
index edd10e6ea..7046b69f9 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp
@@ -42,13 +42,13 @@ namespace sd {
             int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar
 
             if (targetRank == 0) { // scalar only
-                determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape));
             }
             else if (targetRank == 1) { // vector
-                determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
             }
             else { // only two last dimensions are excluded
-                determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
             }
             return SHAPELIST(determinantShape);
         }
@@ -89,13 +89,13 @@ namespace sd {
             int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar
 
             if (targetRank == 0) { // scalar only
-                determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape));
             }
             else if (targetRank == 1) { // vector
-                determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
             }
             else { // only two last dimensions are excluded
-                determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
             }
             return SHAPELIST(determinantShape);
         }
@@ -130,13 +130,13 @@ namespace sd {
             int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar
 
             if (targetRank == 0) { // scalar only
-                determinantShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape));
             }
             else if (targetRank == 1) { // vector
-                determinantShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(inShape, 0), ArrayOptions::dataType(inShape));
             }
             else { // only two last dimensions are excluded
-                determinantShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
+                determinantShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), targetRank, shape::shapeOf(inShape));
             }
             return SHAPELIST(determinantShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp
index 9a351a13f..1cdfc6884 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp
@@ -57,20 +57,20 @@ namespace sd {
             if (!fullMatricies) { // outputs are: Q is MxN and R is NxN
                 shape[targetRank - 1] = shape::sizeAt(inShape, -1);
                 shape[targetRank - 2] = shape[targetRank - 1];
-                shapeQ = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape),
+                shapeQ = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape),
                                                                              shape::order(inShape), targetRank,
                                                                              shape::shapeOf(inShape));
-                shapeR = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape),
+                shapeR = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape),
                                                                              shape::order(inShape), shape);
 
             }
             else {// otherwise outputs are Q is MxM and R is MxN with zero filled rows
                 shape[targetRank - 1] = shape::sizeAt(inShape, -2);
                 shape[targetRank - 2] = shape[targetRank - 1];
-                shapeR = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape),
+                shapeR = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape),
                                                                              shape::order(inShape), targetRank,
                                                                              shape::shapeOf(inShape));
-                shapeQ = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape),
+                shapeQ = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape),
                                                                              shape::order(inShape), shape);
             }
 
diff --git a/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp b/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp
index 9a9fb730b..915ba5fb9 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/sufficient_statistics.cpp
@@ -71,14 +71,14 @@ namespace sd {
             helpers::adjustAxis(input->rankOf(), axisVector, axis);
 
             //std::vector<int> dims = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis});
-            auto scalarShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)));
+            auto scalarShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0)));
             auto sumShape = ShapeUtils::evalReduceShapeInfo('c', axis, *input, false, false, block.workspace());
 
             auto squareShape = ShapeUtils::evalReduceShapeInfo('c', axis, *input, false, false, block.workspace());
 
             auto shapeList = SHAPELIST(scalarShape, sumShape, squareShape);
             if (block.numT() > 0)
-                shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
+                shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
 
             return shapeList;
         }
diff --git a/libnd4j/include/ops/declarable/generic/linalg/svd.cpp b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp
index ca5fd52c2..3331dcdd8 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/svd.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp
@@ -101,14 +101,14 @@ DECLARE_SHAPE_FN(svd) {
         shape::updateStrides(uShapeInfo, shape::order(inShapeInfo));
         shape::updateStrides(vShapeInfo, shape::order(inShapeInfo));
 
-        auto result = SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(sShapeInfo)), ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(uShapeInfo)), ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(vShapeInfo)));
+        auto result = SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(sShapeInfo)), ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(uShapeInfo)), ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(vShapeInfo)));
         RELEASE(sShapeInfo, block.workspace());
         RELEASE(uShapeInfo, block.workspace());
         RELEASE(vShapeInfo, block.workspace());
         return result;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(sShapeInfo, block.workspace()));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(sShapeInfo, block.workspace()));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/linalg/trace.cpp b/libnd4j/include/ops/declarable/generic/linalg/trace.cpp
index fa9fd5f56..1a67ec754 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/trace.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/trace.cpp
@@ -58,7 +58,7 @@ DECLARE_SHAPE_FN(trace) {
         outShapeInfo[i] = inShapeInfo[i];
 
     shape::updateStrides(outShapeInfo, shape::order(inShapeInfo));
-    auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo, ArrayOptions::dataType(inShapeInfo)));
+    auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo, ArrayOptions::dataType(inShapeInfo)));
     RELEASE(outShapeInfo, block.getWorkspace());
     return SHAPELIST(result);
 }
diff --git a/libnd4j/include/ops/declarable/generic/linalg/tri.cpp b/libnd4j/include/ops/declarable/generic/linalg/tri.cpp
index c7e1a125b..d0c1f7a6f 100644
--- a/libnd4j/include/ops/declarable/generic/linalg/tri.cpp
+++ b/libnd4j/include/ops/declarable/generic/linalg/tri.cpp
@@ -53,7 +53,7 @@ DECLARE_SHAPE_FN(tri) {
 
     auto dtype = block.numD() ? D_ARG(0) : DataType::FLOAT32;
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', {rows, cols}));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', {rows, cols}));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp
index d745b0209..0d5d1d011 100644
--- a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp
@@ -121,9 +121,9 @@ DECLARE_SHAPE_FN(absolute_difference_loss) {
     Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else 							// in this case output has the same shape as labels and predictions
-    	outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    	outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(outShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp
index 4d134f6b1..99cf2e3c1 100644
--- a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp
@@ -143,7 +143,7 @@ DECLARE_SHAPE_FN(cosine_distance_loss) {
  	// evaluate output shapeInfo
     Nd4jLong const* outShapeInfo = nullptr;
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-        outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+        outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else { 							// in this case output has the same shape as labels reduced  by dim axis
 
     	std::vector<int> dimensions = {dim};
diff --git a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp
index fe66387a8..71e7489ea 100644
--- a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp
@@ -128,9 +128,9 @@ namespace sd {
             Nd4jLong const* outShapeInfo = nullptr;
 
             if(INT_ARG(0) != 0) 			// in this case output is scalar
-                outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+                outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
             else 							// in this case output has the same shape as labels and predictions
-                outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+                outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
             return SHAPELIST(outShapeInfo);
 
diff --git a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp
index df57092e1..2d0b44b3c 100644
--- a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp
@@ -133,9 +133,9 @@ DECLARE_SHAPE_FN(huber_loss) {
     Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else 							// in this case output has the same shape as labels and predictions
-    	outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    	outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(outShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp
index 3afeea2ba..48f3a64fa 100644
--- a/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/l2_loss.cpp
@@ -38,7 +38,7 @@ namespace sd {
             return Status::OK();
         }
         DECLARE_SHAPE_FN(l2_loss) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
         }
 
         DECLARE_TYPES(l2_loss) {
diff --git a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp
index e43e7b1d1..ab0c8923e 100644
--- a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp
@@ -130,9 +130,9 @@ DECLARE_SHAPE_FN(log_loss) {
     Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else 							// in this case output has the same shape as labels and predictions
-    	outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    	outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(outShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp
index b39326071..5cc6b60ab 100644
--- a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp
@@ -133,9 +133,9 @@ namespace ops {
         Nd4jLong const* outShapeInfo = nullptr;
 
         if(INT_ARG(0) != 0) 			// in this case output is scalar
-            outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+            outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
         else 							// in this case output has the same shape as labels and predictions
-            outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(labelsShapeInfo, outType));
+            outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(labelsShapeInfo, outType));
 
         return SHAPELIST(outShapeInfo);
     }
diff --git a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp
index 5a0e20807..f36fa3c62 100644
--- a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp
@@ -200,7 +200,7 @@ namespace sd {
             Nd4jLong const* outShapeInfo = nullptr;
 
             if(INT_ARG(0) != 0) 			// in this case output is scalar
-                outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+                outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
             else { 							// in this case output has the shape as labels and logits minus last dimension
                 std::vector<int> dimensions = {-1};
                 outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(predictionsShapeInfo), dimensions, predictionsShapeInfo, false, true, block.getWorkspace());
diff --git a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp
index fd00a0364..6c54706c4 100644
--- a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp
@@ -129,9 +129,9 @@ DECLARE_SHAPE_FN(mean_sqerr_loss) {
     Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else 							// in this case output has the same shape as labels and predictions
-    	outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    	outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(outShapeInfo);
 
diff --git a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp
index f2e665bdb..ddd28d43d 100644
--- a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp
@@ -140,9 +140,9 @@ DECLARE_SHAPE_FN(sigm_cross_entropy_loss) {
     Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else 							// in this case output has the same shape as labels and logits
-    	outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    	outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(outShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp
index f70a58a10..79d46e448 100644
--- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp
@@ -161,7 +161,7 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss) {
 	Nd4jLong const* outShapeInfo = nullptr;
 
     if(INT_ARG(0) != 0) 			// in this case output is scalar
-    	outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType);
+    	outShapeInfo = ConstantShapeHelper::getInstance().scalarShapeInfo(outType);
     else { 							// in this case output has the shape as labels and logits minus last dimension
     	std::vector<int> dimensions = {-1};
     	outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(logitsShapeInfo), dimensions, logitsShapeInfo, false, true, block.getWorkspace());
@@ -384,9 +384,9 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss_grad) {
 
     auto outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo));
 
-    auto dLdpShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo)));
-    auto dLdwShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(weightsShapeInfo), shape::shapeOf(weightsShapeInfo), shape::rank(weightsShapeInfo)));
-    auto dLdlShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    auto dLdpShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo)));
+    auto dLdwShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(weightsShapeInfo), shape::shapeOf(weightsShapeInfo), shape::rank(weightsShapeInfo)));
+    auto dLdlShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp
index 6dab14365..0636450c7 100644
--- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp
+++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropyWithLogits.cpp
@@ -127,8 +127,8 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss_with_logits_grad) {
 
     DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo));
 
-    auto dLdpShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo)));
-    auto dLdlShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
+    auto dLdpShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(logitsShapeInfo), shape::shapeOf(logitsShapeInfo), shape::rank(logitsShapeInfo)));
+    auto dLdlShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outType, shape::order(labelsShapeInfo), shape::shapeOf(labelsShapeInfo), shape::rank(labelsShapeInfo)));
 
     return SHAPELIST(dLdpShapeInfo, dLdlShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp b/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp
index 539b21145..df107451a 100644
--- a/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/activations/crelu.cpp
@@ -61,7 +61,7 @@ namespace sd {
                 shape.emplace_back(shape::shapeOf(inShape)[e]);
 
             shape[shape.size()-1] *= 2;
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape);
 
             return SHAPELIST(newShape);
         }
@@ -106,7 +106,7 @@ namespace sd {
 
         DECLARE_SHAPE_FN(crelu_bp) {
             auto inShape = inputShape->at(0);
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape)));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp
index 56684c569..7018ae342 100644
--- a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp
@@ -333,10 +333,10 @@ DECLARE_SHAPE_FN(batchnorm_bp) {
     auto shapes = SHAPELIST();
 
     // dLdI shapeInfo
-    shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(outType, inShapeInfo));
+    shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(outType, inShapeInfo));
 
     // dLdM shapeInfo
-    shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(outType, meanShapeInfo));
+    shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(outType, meanShapeInfo));
 
     // dLdV shapeInfo (same as dLdM)
     shapes->push_back(shapes->at(shapes->size()-1));
diff --git a/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp b/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp
index eec864c5e..bc164e952 100644
--- a/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/bias_add.cpp
@@ -58,7 +58,7 @@ DECLARE_SHAPE_FN(biasadd) {
     auto yShape = inputShape->at(1);
 
     auto dtype = ArrayOptions::dataType(yShape);
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(xShape, dtype)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(xShape, dtype)));
 }
 
 DECLARE_TYPES(biasadd) {
diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp
index e0440692b..d62a98d52 100644
--- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp
@@ -159,7 +159,7 @@ DECLARE_SHAPE_FN(deconv2d) {
         outputShape[3] = oC;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(weightsShapeInfo), shape::order(inputShapeInfo), outputShape, 4)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(weightsShapeInfo), shape::order(inputShapeInfo), outputShape, 4)));
 }
 
     DECLARE_TYPES(deconv2d_bp) {
diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp
index ae97c3d65..9af389bf6 100644
--- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d_tf.cpp
@@ -144,7 +144,7 @@ DECLARE_SHAPE_FN(deconv2d_tf) {
         shape[3] = iC;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weightsShapeInfo), shape::order(gradOShapeInfo), 4, shape));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(weightsShapeInfo), shape::order(gradOShapeInfo), 4, shape));
 }
 
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp
index c3ecddf53..b3a0e1667 100644
--- a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp
@@ -107,7 +107,7 @@ namespace ops {
             rates = r->template asVectorT<int>();
         } else {
             if (block.numI() < 9) {
-                auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType());
+                auto newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType());
                 return SHAPELIST(newShape);
             }
 
@@ -127,7 +127,7 @@ namespace ops {
         helpers::dilation_hw(block.launchContext(), input, weights, strides, rates, isSameShape, &sH, &sW, &pH, &pW, &dH, &dW, &oH, &oW);
 
         std::array<Nd4jLong, 4> shape = {{bS, oH, oW, iC}};
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data());
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data());
         return SHAPELIST(newShape);
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp
index c80608e03..49dc52a03 100644
--- a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp
@@ -113,8 +113,8 @@ namespace ops  {
         auto keys_shape = inputShape->at(1);
         auto values_shape = inputShape->at(2);
 
-        auto weights_shape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(keys_shape, query_shape, true, false));
-        auto output_shape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(values_shape, weights_shape, false, false));
+        auto weights_shape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(keys_shape, query_shape, true, false));
+        auto output_shape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(values_shape), 'c', ShapeUtils::evalShapeForMatmul(values_shape, weights_shape, false, false));
 
         if(INT_ARG(1)){
             return SHAPELIST(output_shape, weights_shape);
diff --git a/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp b/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp
index 0888854ee..0f4a01e03 100644
--- a/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/embedding_lookup.cpp
@@ -94,7 +94,7 @@ DECLARE_SHAPE_FN(embedding_lookup) {
         for (int e = 1; e < outRank; e++)
             shapeInfo[e] = shape::sizeAt(inShapeInfo, e);
 
-        auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo);
+        auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo);
         return SHAPELIST(outShapeInfo);
     }
 
@@ -106,7 +106,7 @@ DECLARE_SHAPE_FN(embedding_lookup) {
     for (int e = 1; e < outRank; e++)
         shapeInfo[e] = shape::sizeAt(inShapeInfo, e);
 
-    auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo);
+    auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), shapeInfo);
     return SHAPELIST(outShapeInfo);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp
index f9b7284f1..7ff8eb4c5 100644
--- a/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/multi_head_dot_product_attention.cpp
@@ -138,8 +138,8 @@ namespace ops  {
         auto numHeads = shape::sizeAt(WkShape, 0);
         auto timeSteps = shape::sizeAt(keysShape, 2);
 
-        auto weightsShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, numHeads, timeSteps, queryCount});
-        auto outputShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, outSize, queryCount});
+        auto weightsShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, numHeads, timeSteps, queryCount});
+        auto outputShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::ArrayOptions::dataType(valuesShape), 'c', {batchSize, outSize, queryCount});
 
         if(INT_ARG(1)){
             return SHAPELIST(outputShape, weightsShape);
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp
index b93cbe47f..fde075667 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool2d.cpp
@@ -133,7 +133,7 @@ DECLARE_SHAPE_FN(avgpool2d) {
         newShape[3] = iD;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), newShape, 4)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), newShape, 4)));
 }
 
     DECLARE_TYPES(avgpool2d_bp) {
@@ -210,7 +210,7 @@ DECLARE_SHAPE_FN(avgpool2d_bp) {
     REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "AVGPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]);
     REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "AVGPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp
index 85b8d8833..d8df11385 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/avgpool3d.cpp
@@ -135,7 +135,7 @@ DECLARE_SHAPE_FN(avgpool3dnew) {
         outputShape[4] = iC;
     }
     // TF DOC: A Tensor. Has the same type as input.
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5)));
 }
 
         DECLARE_TYPES(avgpool3dnew_bp) {
@@ -202,7 +202,7 @@ CUSTOM_OP_IMPL(avgpool3dnew_bp, 2, 1, false, 0, 14) {
 
 
 DECLARE_SHAPE_FN(avgpool3dnew_bp) {
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp
index 31dd72fc3..8a37b90b0 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp
@@ -136,7 +136,7 @@ DECLARE_SHAPE_FN(maxpool2d) {
         newShape[3] = iC;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4)));
 }
 
         DECLARE_TYPES(maxpool2d_bp) {
@@ -215,7 +215,7 @@ DECLARE_SHAPE_FN(maxpool2d_bp) {
     REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "MAXPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]);
     REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "MAXPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp
index d1b5928b6..fd28901cc 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp
@@ -137,7 +137,7 @@ DECLARE_SHAPE_FN(maxpool3dnew) {
         outputShape[4] = iC;
     }
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outputShape, 5)));
 }
 
         DECLARE_TYPES(maxpool3dnew_bp) {
@@ -217,7 +217,7 @@ CUSTOM_OP_IMPL(maxpool3dnew_bp, 2, 1, false, 0, 14) {
 
 
 DECLARE_SHAPE_FN(maxpool3dnew_bp) {
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp
index 111846584..eced3c2b4 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool_with_argmax.cpp
@@ -53,8 +53,8 @@ namespace sd {
         DECLARE_SHAPE_FN(max_pool_with_argmax) {
             
             auto in = inputShape->at(0);
-            auto valuesShape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(in));
-            auto indicesShape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(in, DataType::INT64));
+            auto valuesShape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(in));
+            auto indicesShape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(in, DataType::INT64));
             
             return SHAPELIST(valuesShape, indicesShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp
index adcd40daa..927627ff8 100644
--- a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp
@@ -130,7 +130,7 @@ namespace sd {
                 newShape[3] = iC;
             }
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), order, newShape, 4)));
         }
 
 
@@ -225,7 +225,7 @@ DECLARE_SHAPE_FN(pnormpool2d_bp) {
     REQUIRE_TRUE(inputShape->at(0)[0] == 4, 0, "PNORMPOOL2D_BP op: input array must be 4D, but got %i instead!", inputShape->at(0)[0]);
     REQUIRE_TRUE(inputShape->at(1)[0] == 4, 0, "PNORMPOOL2D_BP op: output's gradient array (next epsilon) must be 4D, but got %i instead!", inputShape->at(1)[0]);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0), ArrayOptions::dataType(inputShape->at(1)))));
 }
 
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp
index a0b1e707b..0be3c8393 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp
@@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(gru) {
     REQUIRE_TRUE(Wh->isSameShape(whCorrectShape), 0, "GRU operation: wrong shape of hidden-to-hidden weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(whCorrectShape).c_str(), ShapeUtils::shapeAsString(Wh).c_str());
     REQUIRE_TRUE(b->isSameShape(bCorrectShape),   0, "GRU operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str());
 
-    auto hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut});
+    auto hShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut});
 
     return SHAPELIST(hShapeInfo);
 }
@@ -173,11 +173,11 @@ DECLARE_SHAPE_FN(gru_bp) {
     REQUIRE_TRUE(b->isSameShape(bCorrectShape),   0, "GRU_BP operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str());
     REQUIRE_TRUE(dLdh->isSameShape(hCorrectShape),0, "GRU_BP operation: wrong shape of gradient vs. ff output, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(hCorrectShape).c_str(), ShapeUtils::shapeAsString(dLdh).c_str());
 
-    auto  dLdxShapeInfo  = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->shapeInfo());
-    auto  dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->shapeInfo());
-    auto  dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->shapeInfo());
-    auto  dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->shapeInfo());
-    auto  dLdbShapeInfo  = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->shapeInfo());
+    auto  dLdxShapeInfo  = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), x->shapeInfo());
+    auto  dLdhIShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), hI->shapeInfo());
+    auto  dLdWxShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), Wx->shapeInfo());
+    auto  dLdWhShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), Wh->shapeInfo());
+    auto  dLdbShapeInfo  = ConstantShapeHelper::getInstance().createShapeInfo(dLdh->dataType(), b->shapeInfo());
 
     return SHAPELIST(dLdxShapeInfo, dLdhIShapeInfo, dLdWxShapeInfo, dLdWhShapeInfo, dLdbShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp
index 037f09736..25c8d3744 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gruCell.cpp
@@ -111,7 +111,7 @@ DECLARE_SHAPE_FN(gruCell) {
     s0[2] = nU;
 
     ShapeUtils::updateStridesAndType(s0, x, shape::order(hLast));
-    auto ts0 = ConstantShapeHelper::getInstance()->createFromExisting(s0, block.workspace());
+    auto ts0 = ConstantShapeHelper::getInstance().createFromExisting(s0, block.workspace());
 
     //4 output shapes, all [bs, nU]
     return SHAPELIST(ts0, ts0, ts0, ts0);
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp
index 20a9e6710..32cb481ee 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmCell.cpp
@@ -136,7 +136,7 @@ DECLARE_SHAPE_FN(lstmCell) {
     ShapeUtils::updateStridesAndType(hShapeInfo, xtShapeInfo, shape::order(ht_1ShapeInfo));
     ShapeUtils::updateStridesAndType(cShapeInfo, xtShapeInfo, shape::order(ct_1ShapeInfo));
 
-    auto result = SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(hShapeInfo), ConstantShapeHelper::getInstance()->createShapeInfo(cShapeInfo));
+    auto result = SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(hShapeInfo), ConstantShapeHelper::getInstance().createShapeInfo(cShapeInfo));
     RELEASE(hShapeInfo, block.workspace());
     RELEASE(cShapeInfo, block.workspace());
     return result;
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp
index a5c8b8d28..0a0754a8e 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp
@@ -362,7 +362,7 @@ DECLARE_SHAPE_FN(lstmLayer) {
             hShape = {sL, 2, bS, nOut};
         }
 
-        shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape));
+        shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), hShape));
     }
 
     // evaluate hL shape (output at last step)
@@ -375,7 +375,7 @@ DECLARE_SHAPE_FN(lstmLayer) {
         else
             hLShape = {2, bS, nOut};
 
-        shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape));
+        shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), hLShape));
 
         if(retLastC)                                // cL and hL have same shapes
             shapes->push_back(shapes->at(shapes->size() - 1));
@@ -391,7 +391,7 @@ DECLARE_SHAPE_FN(lstmLayer) {
         else
             cLShape = {2, bS, nOut};
 
-        shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape));
+        shapes->push_back(ConstantShapeHelper::getInstance().createShapeInfo(type, x->ordering(), cLShape));
     }
 
     return shapes;
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp
index 84dd6356a..ba4e3d52f 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp
@@ -127,7 +127,7 @@ DECLARE_SHAPE_FN(sru) {
     ShapeUtils::updateStridesAndType(newShapeInfo1, xShapeInfo, shape::order(xShapeInfo));
     ShapeDescriptor descriptor(newShapeInfo1);
     RELEASE(newShapeInfo1, block.getWorkspace());
-    auto result = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor);
+    auto result = ConstantShapeHelper::getInstance().createShapeInfo(descriptor);
     return SHAPELIST(result, result);
 }
 
@@ -311,7 +311,7 @@ DECLARE_SHAPE_FN(sru_bp) {
     ShapeDescriptor descriptor3(ArrayOptions::dataType(inShape), order, {1, 2 * inSize});
     ShapeDescriptor descriptor4(ArrayOptions::dataType(inShape), order, {bS, inSize});
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4));
 }
 
 
@@ -396,7 +396,7 @@ DECLARE_SHAPE_FN(sru_bi) {
     char order = shape::order(xShapeInfo);
 
     ShapeDescriptor descriptor(ArrayOptions::dataType(xShapeInfo), order, {time, bS, 2 * inSize});
-    auto result = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor);
+    auto result = ConstantShapeHelper::getInstance().createShapeInfo(descriptor);
     return SHAPELIST(result, result);
 }
 
@@ -505,7 +505,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
     ShapeDescriptor descriptor3(ArrayOptions::dataType(xShapeInfo), order, {4 * inSize});
     ShapeDescriptor descriptor4(ArrayOptions::dataType(xShapeInfo), order, {bS, 2 * inSize});
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4));
 }
 
 }
@@ -771,7 +771,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 
 //     ShapeUtils::updateStridesAndType(newShapeInfo1, inShape, order);
 
-//     auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShapeInfo1));
+//     auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShapeInfo1));
 //     RELEASE(newShapeInfo1, block.getWorkspace());
 //     return SHAPELIST(result, result);
 // }
@@ -935,5 +935,5 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 //     ShapeDescriptor descriptor3(ArrayOptions::dataType(inShape), order, {1, 2 * inSize});
 //     ShapeDescriptor descriptor4(ArrayOptions::dataType(inShape), order, {bS, inSize});
 
-//     return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor1), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor2), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor3), ConstantShapeHelper::getInstance()->createShapeInfo(descriptor4));
+//     return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor1), ConstantShapeHelper::getInstance().createShapeInfo(descriptor2), ConstantShapeHelper::getInstance().createShapeInfo(descriptor3), ConstantShapeHelper::getInstance().createShapeInfo(descriptor4));
 // }
diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp
index ee446037c..3268da453 100644
--- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sruCell.cpp
@@ -97,7 +97,7 @@ DECLARE_SHAPE_FN(sruCell) {
     ShapeUtils::updateStridesAndType(hShapeInfo, ct_1ShapeInfo, shape::order(ct_1ShapeInfo));
     ShapeUtils::updateStridesAndType(cShapeInfo, ct_1ShapeInfo, shape::order(ct_1ShapeInfo));
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createFromExisting(hShapeInfo, block.workspace()), ConstantShapeHelper::getInstance()->createFromExisting(cShapeInfo, block.workspace()));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createFromExisting(hShapeInfo, block.workspace()), ConstantShapeHelper::getInstance().createFromExisting(cShapeInfo, block.workspace()));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp
index 3b9fc3916..45b864f26 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/bincount.cpp
@@ -111,7 +111,7 @@ namespace sd {
                 outLength = sd::math::nd4j_min(outLength, max->e<int>(0));
             }
 
-            auto newshape = ConstantShapeHelper::getInstance()->vectorShapeInfo(outLength, dtype);
+            auto newshape = ConstantShapeHelper::getInstance().vectorShapeInfo(outLength, dtype);
 
             shapeList->push_back(newshape); 
             return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp
index 4fc31dd51..d954a0b44 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp
@@ -82,7 +82,7 @@ DECLARE_SHAPE_FN(broadcast_dynamic_shape) {
 
     const int maxRank = xRank > yRank ? xRank : yRank;
 
-    auto outputShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxRank, ArrayOptions::dataType(inputShape->at(0)));
+    auto outputShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(maxRank, ArrayOptions::dataType(inputShape->at(0)));
 
     return SHAPELIST(outputShapeInfo);
 }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp
index 561c6bb5b..3d06d4ced 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/check_numerics.cpp
@@ -41,7 +41,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(check_numerics) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inputShape->at(0))));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inputShape->at(0))));
         }
 
         DECLARE_TYPES(check_numerics) {
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp
index 1decc65f0..f694502b3 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/compare_and_bitpack.cpp
@@ -53,7 +53,7 @@ namespace sd {
             auto inShape = inputShape->at(0);
             DataType newType = DataType::UINT8;
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, newType)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, newType)));
         }
 
     }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp
index f90513ca3..f5c5cbb91 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/confusion_matrix.cpp
@@ -77,7 +77,7 @@ namespace sd {
             }
             
             std::array<Nd4jLong, 2> shape = {{numClasses,numClasses}};
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', 2, shape.data());
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', 2, shape.data());
             return SHAPELIST(newShape);
         }
     }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp
index fd3315157..d9c931f21 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/expose.cpp
@@ -61,7 +61,7 @@ namespace sd {
                 auto var = block.getVariable(e);
                 if (var->variableType() == VariableType::NDARRAY) {
                     auto inShape = inputShape->at(e);
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape)));
                 }
             }
 
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp
index a243842d2..7618de5b1 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/in_top_k.cpp
@@ -46,7 +46,7 @@ namespace sd {
             auto in = inputShape->at(1);
             int shapeRank = shape::rank(in);
 
-            auto aShape = ConstantShapeHelper::getInstance()->createShapeInfo(sd::DataType::BOOL, shape::order(in), shape::rank(in), shape::shapeOf(in));
+            auto aShape = ConstantShapeHelper::getInstance().createShapeInfo(sd::DataType::BOOL, shape::order(in), shape::rank(in), shape::shapeOf(in));
             shapeList->push_back(aShape);
             return shapeList;
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp
index 49c7a2957..86a37619e 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/listdiff.cpp
@@ -54,8 +54,8 @@ namespace sd {
 
             REQUIRE_TRUE(saved > 0, 0, "ListDiff: no matches found");
 
-            auto shapeX = ConstantShapeHelper::getInstance()->vectorShapeInfo(saved, values->dataType());
-            auto shapeY = ConstantShapeHelper::getInstance()->vectorShapeInfo(saved, DataType::INT64);
+            auto shapeX = ConstantShapeHelper::getInstance().vectorShapeInfo(saved, values->dataType());
+            auto shapeY = ConstantShapeHelper::getInstance().vectorShapeInfo(saved, DataType::INT64);
             return SHAPELIST(shapeX, shapeY);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp
index ecddab3bc..91512b2f7 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp
@@ -106,7 +106,7 @@ namespace sd {
                 if (actualIndicesCount < maxOutputSize)
                     maxOutputSize = actualIndicesCount;
             }
-            outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32);
+            outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(maxOutputSize, DataType::INT32);
 
             return SHAPELIST(outputShape);
         }
@@ -211,7 +211,7 @@ namespace sd {
             if (len > 0)
                 len = helpers::nonMaxSuppressionV3(block.launchContext(), boxes, scales, maxOutputSize, overlayThreshold, scoreThreshold, nullptr);
 
-            auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32);
+            auto outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(len, DataType::INT32);
 
             return SHAPELIST(outputShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp
index 30f59ff35..1cc4addbc 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp
@@ -75,7 +75,7 @@ namespace sd {
             if (boxSize < maxOutputSize) 
                 maxOutputSize = boxSize;
 
-            auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32);
+            auto outputShape = ConstantShapeHelper::getInstance().vectorShapeInfo(maxOutputSize, DataType::INT32);
 
             return SHAPELIST(outputShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp
index b0a549c43..b9326a981 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp
@@ -61,11 +61,11 @@ namespace sd {
                 outShape = CONSTANT(outputShape);
             }
             else if (outRank == 1) {
-                outShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in));
+                outShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in));
             }
             else {
                 //outputShape = shape::createScalarShapeInfo();
-                outShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in));
+                outShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in));
             }
             return SHAPELIST(outShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp
index 6349b84fe..5b25ea7e6 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp
@@ -100,7 +100,7 @@ namespace sd {
                 shape.push_back(shape::shapeOf(inShape)[e]);
 
             shape.insert(shape.begin() + axis, depth);
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data());
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', rank + 1, shape.data());
 
             return SHAPELIST(newShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp
index 799572794..b042e94fe 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp
@@ -76,7 +76,7 @@ namespace sd {
                 aShape[shapeRank] = k;
 
                 shape::updateStrides(aShape, shape::order(in));
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(aShape, (e == 0?ArrayOptions::dataType(in):sd::DataType::INT64))));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(aShape, (e == 0?ArrayOptions::dataType(in):sd::DataType::INT64))));
 
                 RELEASE(aShape, block.getWorkspace());
             }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp
index 9005348a1..9d234abaa 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp
@@ -46,14 +46,14 @@ namespace sd {
             int uniqueCount = helpers::uniqueCount(block.launchContext(), source);
 
             if (uniqueCount == 0) { // empty value Shape
-                valuesShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(source->dataType());
+                valuesShape = ConstantShapeHelper::getInstance().emptyShapeInfo(source->dataType());
             }
             else {
             // all output shapes are 1D arrays (vectors)
-                valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, ArrayOptions::dataType(in));
+                valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, ArrayOptions::dataType(in));
             }
             // second output is always LONG
-            indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::length(in), sd::DataType::INT64);
+            indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(shape::length(in), sd::DataType::INT64);
 
             //COPY_SHAPE_EX(in, indicesShape, block.getWorkspace());
 
@@ -77,13 +77,13 @@ namespace sd {
             int uniqueCount = helpers::uniqueCount(block.launchContext(), source);
             // all output shapes are 1D arrays (vectors)
             // all output shapes are 1D arrays (vectors)
-            auto valuesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, source->dataType());
+            auto valuesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, source->dataType());
 
             // second output is always LONG
-            auto indicesShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(source->lengthOf(), sd::DataType::INT64);
+            auto indicesShape = ConstantShapeHelper::getInstance().vectorShapeInfo(source->lengthOf(), sd::DataType::INT64);
 
             // third one as well
-            auto countsShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(uniqueCount, sd::DataType::INT64);
+            auto countsShape = ConstantShapeHelper::getInstance().vectorShapeInfo(uniqueCount, sd::DataType::INT64);
 
             return SHAPELIST(valuesShape, indicesShape, countsShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp
index f70e92cf5..91f0a564d 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/zero_fraction.cpp
@@ -48,7 +48,7 @@ namespace sd {
             return Status::OK();
         }
         DECLARE_SHAPE_FN(zero_fraction) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::DOUBLE));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::DOUBLE));
         }
 
         DECLARE_TYPES(zero_fraction) {
diff --git a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp
index f0b2b587b..ded5bfee5 100644
--- a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp
@@ -53,7 +53,7 @@ namespace sd {
             auto in = INPUT_VARIABLE(0);
             auto shape = in->template asVectorT<Nd4jLong>();
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/exponential.cpp b/libnd4j/include/ops/declarable/generic/random/exponential.cpp
index cac3d1a88..735bab583 100644
--- a/libnd4j/include/ops/declarable/generic/random/exponential.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/exponential.cpp
@@ -42,7 +42,7 @@ namespace sd {
             auto in = INPUT_VARIABLE(0);
             auto shape = in->template asVectorT<Nd4jLong>();
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/gamma.cpp b/libnd4j/include/ops/declarable/generic/random/gamma.cpp
index e21458530..a00ce2b7e 100644
--- a/libnd4j/include/ops/declarable/generic/random/gamma.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/gamma.cpp
@@ -68,7 +68,7 @@ namespace sd {
             auto dtype = ArrayOptions::dataType(alphaShape);
             for (auto i = 0; i < shape::rank(additionalShape); i++)
                 shape.push_back(shape::sizeAt(additionalShape, i));
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
index 7042ae6dd..9f768e9f3 100644
--- a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
@@ -36,7 +36,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(get_seed) {
-            auto newshape = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT64);
+            auto newshape = ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT64);
             return SHAPELIST(newshape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/multinomial.cpp b/libnd4j/include/ops/declarable/generic/random/multinomial.cpp
index 5361d1bbb..2e8225d2c 100644
--- a/libnd4j/include/ops/declarable/generic/random/multinomial.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/multinomial.cpp
@@ -99,7 +99,7 @@ namespace sd {
             nShape[dimA] = numOfSamples;
 
             DataType nType = (argSize > 1) ? ( INT_ARG(1) >= 0 ? static_cast<DataType>(INT_ARG(1)) : sd::DataType::INT64) : sd::DataType::INT64;
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(nType, input->ordering(), nShape));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(nType, input->ordering(), nShape));
         }
         
         DECLARE_TYPES(random_multinomial) {
diff --git a/libnd4j/include/ops/declarable/generic/random/normal.cpp b/libnd4j/include/ops/declarable/generic/random/normal.cpp
index f81a06786..701570784 100644
--- a/libnd4j/include/ops/declarable/generic/random/normal.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/normal.cpp
@@ -48,7 +48,7 @@ namespace sd {
             auto in = INPUT_VARIABLE(0);
             auto shape = in->template asVectorT<Nd4jLong>();
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape);
             return SHAPELIST(newShape);
         }
 		
diff --git a/libnd4j/include/ops/declarable/generic/random/poisson.cpp b/libnd4j/include/ops/declarable/generic/random/poisson.cpp
index 74f3a8570..eedfbbe1f 100644
--- a/libnd4j/include/ops/declarable/generic/random/poisson.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/poisson.cpp
@@ -51,7 +51,7 @@ namespace sd {
             for (auto d = 0; d < shape::rank(lambdaShape); ++d ) {
                 shape.emplace_back(shape::sizeAt(lambdaShape, d));
             }
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/random_crop.cpp b/libnd4j/include/ops/declarable/generic/random/random_crop.cpp
index 2ac2495d3..1b30b2f91 100644
--- a/libnd4j/include/ops/declarable/generic/random/random_crop.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/random_crop.cpp
@@ -59,7 +59,7 @@ DECLARE_SHAPE_FN(random_crop) {
     for (int e = 0; e < shape.size(); e++)
         shape[e] = (*in).e<Nd4jLong>(e);
     
-    auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(typeShape), 'c', shape);
+    auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(typeShape), 'c', shape);
     return SHAPELIST(newShape);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
index f4c240d50..f7050f3ab 100644
--- a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
@@ -48,7 +48,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(set_seed) {
-            auto newshape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType());
+            auto newshape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType());
             return SHAPELIST(newshape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/random/uniform.cpp b/libnd4j/include/ops/declarable/generic/random/uniform.cpp
index 94df6b32d..d4abccf78 100644
--- a/libnd4j/include/ops/declarable/generic/random/uniform.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/uniform.cpp
@@ -80,7 +80,7 @@ namespace sd {
             if (block.width() > 1)
                 REQUIRE_TRUE(dtype == INPUT_VARIABLE(1)->dataType(), 0, "RandomUniform: data type of output and min/max args should be the same");
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
index 5fb452227..a347c398a 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argamax.cpp
@@ -84,7 +84,7 @@ namespace sd {
 
             // special case - output is scalar
             if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype));
             }
 
             return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
index 4f590aae8..68ad9d2e5 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argamin.cpp
@@ -84,7 +84,7 @@ namespace sd {
 
             // special case - output is scalar
             if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype));
             }
 
             return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
index 9c45b4c37..f8a2486fa 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argmax.cpp
@@ -86,7 +86,7 @@ namespace sd {
 
             // special case - output is scalar
             if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype));
             }
 
             return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
diff --git a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
index 97430a24f..40648b7f6 100644
--- a/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
+++ b/libnd4j/include/ops/declarable/generic/reduce/argmin.cpp
@@ -88,7 +88,7 @@ namespace sd {
 
             // special case - output is scalar
             if (dims.empty() || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(dtype));
             }
 
             return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', dims, inputShape->at(0), dtype, keepDims, false, block.getWorkspace()));
diff --git a/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp b/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp
index 49961bfe2..18d10be7b 100644
--- a/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/broadcast_to.cpp
@@ -76,7 +76,7 @@ DECLARE_SHAPE_FN(broadcast_to) {
     for(int i = 1; i <= inputRank; ++i)
         REQUIRE_TRUE(inputShapeInfo[inputRank+1-i] == outShape[shapeLen-i] || inputShapeInfo[inputRank+1-i] == 1, 0, "BROADCAST_TO op: shape of input array %s can't be broadcasted to the shape %s !", ShapeUtils::shapeAsString(inputShapeInfo).c_str(), ShapeUtils::shapeAsString(outShape).c_str());
         
-    auto outShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outShape);
+    auto outShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), shape::order(inputShapeInfo), outShape);
     return SHAPELIST(outShapeInfo);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp b/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp
index 6a0ad187c..c35a81279 100644
--- a/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/evaluate_reduction_shape.cpp
@@ -34,7 +34,7 @@ namespace sd {
 
             auto shape = inputShape->asVectorT<Nd4jLong>();
 
-            auto tempShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(sd::DataType::INT64, 'c', shape);
+            auto tempShapeInfo = ConstantShapeHelper::getInstance().createShapeInfo(sd::DataType::INT64, 'c', shape);
             auto tempReductionShapeInfo = ShapeUtils::evalReduceShapeInfo('c', axis, tempShapeInfo, keepDims, oldFormat, block.workspace());
 
             REQUIRE_TRUE(output->lengthOf() == shape::rank(tempReductionShapeInfo), 0, "evaluate_reduction_shape: output length should be %i, but got %i instead", shape::rank(tempReductionShapeInfo), output->lengthOf());
@@ -73,7 +73,7 @@ namespace sd {
                 }
             }
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(length, sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(length, sd::DataType::INT64));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp b/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp
index 86900c264..df31f5109 100644
--- a/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/expand_dims.cpp
@@ -70,13 +70,13 @@ namespace sd {
             if (shape::rank(inShape) == 0) {
 
                 Nd4jLong x = 1;
-                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', 1, &x);
+                auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', 1, &x);
                 return SHAPELIST(newShape);
             }
 
             // FIXME: temp workaround for TF
             if (shape::isScalar(inShape)) {                
-                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', 2, shape::shapeOf(inShape));
+                auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', 2, shape::shapeOf(inShape));
                 return SHAPELIST(newShape);
             }
 
@@ -94,7 +94,7 @@ namespace sd {
 
             shape.insert(shape.begin() + axis, 1);
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), order, shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), order, shape);
             return SHAPELIST(newShape);
         }
     }
diff --git a/libnd4j/include/ops/declarable/generic/shape/flatten.cpp b/libnd4j/include/ops/declarable/generic/shape/flatten.cpp
index 19cc4f469..8327ca1a1 100644
--- a/libnd4j/include/ops/declarable/generic/shape/flatten.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/flatten.cpp
@@ -60,7 +60,7 @@ namespace sd {
                 REQUIRE_TRUE(dtype == ArrayOptions::dataType(inputShape->at(e)), 0, "Flatten: all input arrays must have the same datatype");
             }
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(length, dtype));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(length, dtype));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/shape/order.cpp b/libnd4j/include/ops/declarable/generic/shape/order.cpp
index 5b978f48f..2d7e0994c 100644
--- a/libnd4j/include/ops/declarable/generic/shape/order.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/order.cpp
@@ -45,7 +45,7 @@ namespace sd {
 
             auto isFOrder = INT_ARG(0) == 1;
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(input), isFOrder ? 'f' : 'c', shape::rank(input), shape::shapeOf(input));
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(input), isFOrder ? 'f' : 'c', shape::rank(input), shape::shapeOf(input));
             return SHAPELIST(newShape);
         }
     }
diff --git a/libnd4j/include/ops/declarable/generic/shape/rank.cpp b/libnd4j/include/ops/declarable/generic/shape/rank.cpp
index 8a617dc59..d12e15239 100644
--- a/libnd4j/include/ops/declarable/generic/shape/rank.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/rank.cpp
@@ -37,7 +37,7 @@ namespace sd {
             return Status::OK();
         }
         DECLARE_SHAPE_FN(rank) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT32));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT32));
         }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/shape/reshape.cpp b/libnd4j/include/ops/declarable/generic/shape/reshape.cpp
index 023e9bf89..38bae587e 100644
--- a/libnd4j/include/ops/declarable/generic/shape/reshape.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/reshape.cpp
@@ -42,7 +42,7 @@ CUSTOM_OP_IMPL(reshape, 1, 1, false, 0, -2) {
 
     REQUIRE_TRUE(x->lengthOf() == z->lengthOf(), 0, "Reshape: lengths before and after reshape should match, but got %i vs %i", x->lengthOf(), z->lengthOf());
 
-    if (Environment::getInstance()->isDebugAndVerbose())
+    if (Environment::getInstance().isDebugAndVerbose())
         nd4j_printv("Reshape: new shape", z->getShapeAsVector());
 
     z->assign(x->reshape(z->ordering(), z->getShapeAsVector()));
@@ -159,7 +159,7 @@ DECLARE_SHAPE_FN(reshape) {
     auto len = shape::prodLong(shapeNew.data(), shapeNew.size());
     REQUIRE_TRUE(x->lengthOf() == len, 0, "Reshape: lengths before and after reshape should match, but got %i vs %i", x->lengthOf(), len);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(x->dataType(), orderNew, shapeNew));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(x->dataType(), orderNew, shapeNew));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/shape/shape.cpp b/libnd4j/include/ops/declarable/generic/shape/shape.cpp
index e2db3db3e..098825df3 100644
--- a/libnd4j/include/ops/declarable/generic/shape/shape.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/shape.cpp
@@ -46,7 +46,7 @@ namespace sd {
             if (block.numI() > 0)
                 dtype = DataTypeUtils::fromInt(INT_ARG(0));
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::rank(inShape), dtype));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(shape::rank(inShape), dtype));
         };
 
         DECLARE_TYPES(shape_of) {
diff --git a/libnd4j/include/ops/declarable/generic/shape/shapes.cpp b/libnd4j/include/ops/declarable/generic/shape/shapes.cpp
index 6481d1db3..3f5428122 100644
--- a/libnd4j/include/ops/declarable/generic/shape/shapes.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/shapes.cpp
@@ -43,7 +43,7 @@ namespace sd {
 
             for (int e = 0; e < inputShape->size(); e++) {
                 auto inShape = inputShape->at(e);
-                shapeList->push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::rank(inShape), sd::DataType::INT64));
+                shapeList->push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(shape::rank(inShape), sd::DataType::INT64));
             }
 
             return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/shape/size.cpp b/libnd4j/include/ops/declarable/generic/shape/size.cpp
index d31e782c6..c30ed1b58 100644
--- a/libnd4j/include/ops/declarable/generic/shape/size.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/size.cpp
@@ -37,7 +37,7 @@ namespace sd {
             return Status::OK();
         }
         DECLARE_SHAPE_FN(size) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64));
         }
 
         DECLARE_TYPES(size) {
diff --git a/libnd4j/include/ops/declarable/generic/shape/size_at.cpp b/libnd4j/include/ops/declarable/generic/shape/size_at.cpp
index 2c27b018a..46491e688 100644
--- a/libnd4j/include/ops/declarable/generic/shape/size_at.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/size_at.cpp
@@ -42,7 +42,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(size_at) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64));
         }
 
         DECLARE_TYPES(size_at) {
diff --git a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp
index 0b71dae52..5698f957f 100644
--- a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp
@@ -99,7 +99,7 @@ namespace sd {
             auto length = shape::length(in);
 
             if (rank == 0 || (rank == 1 && length == 1)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in)));
                 return shapeList;
             }
 
@@ -144,11 +144,11 @@ namespace sd {
             }
 
             if (shape.size() == 0) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(in)));
                 return shapeList;
             }
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), order, shape);
             shapeList->push_back(newShape);
             return shapeList;
         }
diff --git a/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp b/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp
index 687d79f25..ec0476e04 100644
--- a/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp
+++ b/libnd4j/include/ops/declarable/generic/shape/tile_to_shape.cpp
@@ -48,7 +48,7 @@ namespace ops {
 
         auto conv = ArrayUtils::toLongVector(*block.getIArguments());
 
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), shape::order(in), conv);
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), shape::order(in), conv);
 
         return SHAPELIST(newShape);
     }
diff --git a/libnd4j/include/ops/declarable/generic/tensor/create.cpp b/libnd4j/include/ops/declarable/generic/tensor/create.cpp
index c79b55497..c692a74d8 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/create.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/create.cpp
@@ -44,7 +44,7 @@ namespace sd {
 
             auto shape = shapeInput->getBufferAsVector<Nd4jLong>();
 
-            return SHAPELIST(sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, order, shape));
+            return SHAPELIST(sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, order, shape));
         }
 
         DECLARE_TYPES(create) {
diff --git a/libnd4j/include/ops/declarable/generic/tensor/fill.cpp b/libnd4j/include/ops/declarable/generic/tensor/fill.cpp
index 18b9ce2b8..81cece901 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/fill.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/fill.cpp
@@ -80,7 +80,7 @@ namespace sd {
             if (block.width() > 1) {
                 dataType = INPUT_VARIABLE(1)->dataType();
             } else if (block.numT() > 0) {
-                dataType = Environment::getInstance()->defaultFloatDataType();
+                dataType = Environment::getInstance().defaultFloatDataType();
             } else if (block.numI() > 0) {
                 dataType = sd::DataType::INT32;
             } else if (block.numB() > 0) {
diff --git a/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp b/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp
index 374456be6..97f7b390f 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/lin_space.cpp
@@ -58,7 +58,7 @@ namespace ops {
         auto dataType = (nInputs > 0) ? ArrayOptions::dataType(inputShape->at(0)) : ( block.numD() > 0 ? static_cast<DataType>(D_ARG(0)) : DataType::FLOAT32) ;
         Nd4jLong steps = (nInputs > 0) ? INPUT_VARIABLE(2)->e<Nd4jLong>(0) : static_cast<Nd4jLong>(I_ARG(0));
 
-        return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(steps, dataType));
+        return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(steps, dataType));
     }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp b/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp
index 32ce54300..0fb8fe283 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/ones_as.cpp
@@ -36,7 +36,7 @@ namespace sd {
         DECLARE_SHAPE_FN(ones_as) {
             auto in = inputShape->at(0);
             auto dtype = block.numD() ? D_ARG(0) : ArrayOptions::dataType(in);
-            auto shape = sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, in);
+            auto shape = sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, in);
 
             //nd4j_printf("numD: %i; dtype: %s\n", block.numD(), DataTypeUtils::asString(dtype).c_str());
 
diff --git a/libnd4j/include/ops/declarable/generic/tensor/range.cpp b/libnd4j/include/ops/declarable/generic/tensor/range.cpp
index a39e07912..2f88b819b 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/range.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/range.cpp
@@ -153,7 +153,7 @@ DECLARE_SHAPE_FN(range) {
 
             if (limit == start){
                 //Return [0] to match TF
-                return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, dtype));
             }
 
             REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !");
@@ -183,7 +183,7 @@ DECLARE_SHAPE_FN(range) {
 
             if (limit == start){
                 //Return [0] to match TF
-                return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, dtype));
+                return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, dtype));
             }
 
             REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !");
@@ -213,7 +213,7 @@ DECLARE_SHAPE_FN(range) {
 
         if (limit == start){
             //Return [0] to match TF
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::INT32));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, sd::DataType::INT32));
         }
 
         REQUIRE_TRUE(delta != 0, 0, "CUSTOM RANGE OP: delta should not be equal to zero !");
@@ -247,7 +247,7 @@ DECLARE_SHAPE_FN(range) {
 
         if (limit == start){
             //Return [0] to match TF
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, Environment::getInstance()->defaultFloatDataType()));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, Environment::getInstance().defaultFloatDataType()));
         }
 
 
@@ -256,10 +256,10 @@ DECLARE_SHAPE_FN(range) {
         steps = static_cast<Nd4jLong >((limit - start) / delta);
 
         if (!block.numD()) {
-            if (Environment::getInstance()->precisionBoostAllowed())
+            if (Environment::getInstance().precisionBoostAllowed())
                 dataType = sd::DataType::DOUBLE;
             else
-                dataType = Environment::getInstance()->defaultFloatDataType();
+                dataType = Environment::getInstance().defaultFloatDataType();
         }
 
         if(math::nd4j_abs<double>(start + steps * delta) < math::nd4j_abs<double >(limit))
@@ -270,7 +270,7 @@ DECLARE_SHAPE_FN(range) {
 
     REQUIRE_TRUE(steps > 0, 0, "CUSTOM RANGE OP: value of (limit-start)/delta should be positive !");
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(steps, dataType));
+    return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(steps, dataType));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp
index 88b06a631..bbdc84ce5 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp
@@ -416,13 +416,13 @@ namespace sd {
                 Nd4jLong offset;
 
                 shape::calcSubArrShapeInfoAndOffset(indices.data(), x->shapeInfo(), subArrShapeInfo, offset, true, true);
-                auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo);
+                auto subArrShapeInfoPack = ConstantShapeHelper::getInstance().bufferForShapeInfo(subArrShapeInfo);
 
                 NDArray::prepareSpecialUse({z}, {x});
 
                 NativeOpExecutioner::execTransformAny(block.launchContext(), sd::transform::Assign,
-                                                      x->bufferWithOffset(offset), reinterpret_cast<Nd4jLong *>(subArrShapeInfoPack.primary()),
-                                                      x->specialBufferWithOffset(offset), reinterpret_cast<Nd4jLong *>(subArrShapeInfoPack.special()),
+                                                      x->bufferWithOffset(offset), subArrShapeInfoPack.primary(),
+                                                      x->specialBufferWithOffset(offset), subArrShapeInfoPack.special(),
                                                       z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(),
                                                       nullptr, nullptr, nullptr, true);
 
@@ -518,18 +518,18 @@ namespace sd {
             std::vector<Nd4jLong> indices;
             bool result = _preprocess_strided_slice(&indices, &shape, input_shape, begin, end, strides, begin_mask, ellipsis_mask, end_mask, new_axis_mask, shrink_axis_mask, &is_identity, &is_simple_slice, &is_dim0);
             if (indices.size()) {
-                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c',
+                auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c',
                                                                                shape);
 //                if (inputLen > 1) {
-//                    newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c',
+//                    newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c',
 //                                                                                   shape);
 //                } else {
-//                    newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape));
+//                    newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShape));
 //                }
                 return SHAPELIST(newShape);
             }
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().emptyShapeInfo(ArrayOptions::dataType(inShape)));
         }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp b/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp
index 6d475af53..7935c567e 100644
--- a/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp
+++ b/libnd4j/include/ops/declarable/generic/tensor/zeros_as.cpp
@@ -39,7 +39,7 @@ namespace sd {
         DECLARE_SHAPE_FN(zeros_as) {
             auto in = inputShape->at(0);
             auto dtype = block.numD() ? D_ARG(0) : ArrayOptions::dataType(in);
-            auto shape = sd::ConstantShapeHelper::getInstance()->createShapeInfo(dtype, in);
+            auto shape = sd::ConstantShapeHelper::getInstance().createShapeInfo(dtype, in);
 
             return SHAPELIST(shape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp b/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp
index 437222052..e67122b05 100644
--- a/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp
+++ b/libnd4j/include/ops/declarable/generic/tests/test_scalar.cpp
@@ -50,7 +50,7 @@ namespace sd {
 
             ArrayOptions::setDataType(newShape, ArrayOptions::dataType(inputShape->at(0)));
 
-            auto shape = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape));
+            auto shape = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape));
             RELEASE(newShape, block.getWorkspace());
             return SHAPELIST(shape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp b/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp
index 89480e5bc..e8d7fc6c3 100644
--- a/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp
+++ b/libnd4j/include/ops/declarable/generic/tests/testcustom.cpp
@@ -39,7 +39,7 @@ namespace sd {
             for (int e = 0; e < shape::rank(inputShape->at(0)); e++)
                 shapeOf[e] = inputShape->at(0)[e+1] * 2;
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', shape::rank(inputShape->at(0)), shapeOf);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', shape::rank(inputShape->at(0)), shapeOf);
             RELEASE(shapeOf, block.getWorkspace());
             return SHAPELIST(newShape);
         }
diff --git a/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp b/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp
index 7860036ed..3a115b8db 100644
--- a/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp
+++ b/libnd4j/include/ops/declarable/generic/thrid_party/firas_sparse.cpp
@@ -93,7 +93,7 @@ namespace sd {
             auto inP = inputShape->at(0);
 
             std::vector<Nd4jLong> shape({shape::shapeOf(inP)[0], (Nd4jLong) block.getIArguments()->size()});
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inP), 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inP), 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp
index 607980f0d..0ffad12a2 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space.cpp
@@ -119,7 +119,7 @@ DECLARE_SHAPE_FN(batch_to_space) {
     REQUIRE_TRUE(oW >= 0, 0, "BatchToSpace: crop left/right values are too big and cause negative output width dimension !");
 
     // we always give out C order here
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {dim0 / (blockSize * blockSize), oH, oW, inputShapeInfo[4]}));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {dim0 / (blockSize * blockSize), oH, oW, inputShapeInfo[4]}));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp
index f62921cc2..1ae1a2e61 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/batch_to_space_nd.cpp
@@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(batch_to_space_nd) {
     for (uint i = 0; i < numOfSpatialDims; ++i)
         outShape[i + 1] = outShape[i + 1] * INPUT_VARIABLE(1)->e<Nd4jLong>(i) - INPUT_VARIABLE(2)->e<uint>(i,0) - INPUT_VARIABLE(2)->e<uint>(i,1);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp b/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp
index 99a01d390..7758cf298 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/clip_by_global_norm.cpp
@@ -55,7 +55,7 @@ DECLARE_SHAPE_FN(clip_by_global_norm) {
         shapeList->push_back(CONSTANT(newShape));
     }
 
-    shapeList->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
+    shapeList->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inputShape->at(0))));
     return shapeList;
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp
index 1cf750e00..6c0901201 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp
@@ -140,9 +140,9 @@ DECLARE_SHAPE_FN(concat) {
 
         if(inputShape->at(i)[0] == 0) {
             if (shape::isEmpty(inputShape->at(i)))
-                arrShapes.push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, INPUT_VARIABLE(0)->dataType()));
+                arrShapes.push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(0, INPUT_VARIABLE(0)->dataType()));
             else
-                arrShapes.push_back(ConstantShapeHelper::getInstance()->vectorShapeInfo(1, INPUT_VARIABLE(0)->dataType()));
+                arrShapes.push_back(ConstantShapeHelper::getInstance().vectorShapeInfo(1, INPUT_VARIABLE(0)->dataType()));
         }
         else{
             arrShapes.push_back(inputShape->at(i));
@@ -191,7 +191,7 @@ DECLARE_SHAPE_FN(concat) {
 //    for(int index : shapesToDelete)
 //        RELEASE(arrShapes[index], block.getWorkspace());
 
-    auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo));
+    auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outShapeInfo));
     RELEASE(outShapeInfo, block.getWorkspace());
     return SHAPELIST(result);
 }
@@ -244,7 +244,7 @@ DECLARE_SHAPE_FN(concat) {
         //     if (_dimension < 0)
         //         _dimension += first->rankOf();
 
-        //     if (sd::Environment::getInstance()->isDebugAndVerbose()) {
+        //     if (sd::Environment::getInstance().isDebugAndVerbose()) {
         //         printf("Shape %i: ", 0);
         //         shape::printShapeInfoLinear((Nd4jLong *) shapes[0]);
         //     }
@@ -262,12 +262,12 @@ DECLARE_SHAPE_FN(concat) {
 
         //         oldScalars &= array->rankOf() == 2 && array->isScalar();
 
-        //         if (sd::Environment::getInstance()->isDebugAndVerbose()) {
+        //         if (sd::Environment::getInstance().isDebugAndVerbose()) {
         //             printf("Shape %i: ", e);
         //             shape::printShapeInfoLinear(array->shapeInfo());
         //         }
         //     }
-        //     if (sd::Environment::getInstance()->isDebugAndVerbose())
+        //     if (sd::Environment::getInstance().isDebugAndVerbose())
         //         fflush(stdout);
 
         //     if (oldScalars) {
@@ -279,7 +279,7 @@ DECLARE_SHAPE_FN(concat) {
 
         //     STORE_RESULT(*output);
 
-        //     if (sd::Environment::getInstance()->isDebugAndVerbose())
+        //     if (sd::Environment::getInstance().isDebugAndVerbose())
         //         output->printShapeInfo("Concat result shape");
 
         //     delete[] buffers;
@@ -428,7 +428,7 @@ DECLARE_SHAPE_FN(concat_bp) {
 
     for (int e = 0; e < numOfInArrs - 1; e++) {
         auto inShape = inputShape->at(e);
-        shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
+        shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
     }
 
     return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp b/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp
index dcf827eb1..cb966472f 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/depth_to_space.cpp
@@ -82,7 +82,7 @@ namespace ops {
         else 
             shape = {{bS, oD, oH, oW }};
         
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data());
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data());
         return SHAPELIST(newShape);
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp b/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp
index ecf0e5324..d3c419b55 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/dynamic_stitch.cpp
@@ -79,7 +79,7 @@ namespace ops {
         for(int i = 1; i < outRank; ++i)
             outShape[i] = shape::sizeAt(restShape, i);
 
-        return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(restShape), shape::order(firstShape), outShape)));
+        return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(restShape), shape::order(firstShape), outShape)));
     }
 }
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/gather.cpp b/libnd4j/include/ops/declarable/generic/transforms/gather.cpp
index 79ce8ad29..a979c5abd 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/gather.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/gather.cpp
@@ -161,7 +161,7 @@ DECLARE_SHAPE_FN(gather) {
 		ArrayOptions::setPropertyBit(outputShapeInfo, ARRAY_EMPTY);
 	}
 
-	auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outputShapeInfo));
+	auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(outputShapeInfo));
 	RELEASE(outputShapeInfo, block.getWorkspace());
     return SHAPELIST(result);
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp b/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp
index 4196385c1..0ef9d71ce 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/hashcode.cpp
@@ -41,7 +41,7 @@ namespace sd {
         };
 
         DECLARE_SHAPE_FN(hashcode) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(sd::DataType::INT64));
         }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp b/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp
index 415361894..e08fcdbf5 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/histogram.cpp
@@ -43,7 +43,7 @@ namespace sd {
         DECLARE_SHAPE_FN(histogram) {
             auto numBins = INT_ARG(0);
 
-            return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(numBins, sd::DataType::INT64));
+            return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(numBins, sd::DataType::INT64));
         }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp
index 36175fc01..208baa5a9 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp
@@ -57,7 +57,7 @@ DECLARE_TYPES(histogram_fixed_width) {
 DECLARE_SHAPE_FN(histogram_fixed_width) {
 
     const int nbins = block.width() == 3 ? INPUT_VARIABLE(2)->e<int>(0) : block.getIArguments()->empty() ? 100 : INT_ARG(0);
-    auto outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(nbins, DataType::INT64);
+    auto outShapeInfo = ConstantShapeHelper::getInstance().vectorShapeInfo(nbins, DataType::INT64);
     return SHAPELIST(outShapeInfo);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp
index 64858001a..0fade28bf 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/merge_add.cpp
@@ -86,7 +86,7 @@ DECLARE_SYN(accumulate_n, mergeadd);
 
         for (int e = 0; e < numOfInArrs; e++) {
             auto inShape = inputShape->at(e);
-            shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
+            shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
         }
 
         return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp
index 83a448170..2ea0d501b 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/merge_avg.cpp
@@ -80,7 +80,7 @@ OP_IMPL(mergeavg, -1, 1, false) {
 
         for (int e = 0; e < numOfInArrs; e++) {
             auto inShape = inputShape->at(e);
-            shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
+            shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
         }
 
         return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp
index 49ab78f7c..e95092f38 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/merge_max.cpp
@@ -85,7 +85,7 @@ DECLARE_SYN(MergeMax, mergemax);
         
         for (int e = 0; e < numOfInArrs; e++) {
             auto inShape = inputShape->at(e);
-             shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
+             shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShape), shape::order(inShape), shape::shapeOf(inShape), shape::rank(inShape))));
         }
 
         return shapeList;
diff --git a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp
index 143e57a80..403272530 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp
@@ -81,7 +81,7 @@ DECLARE_SHAPE_FN(mirror_pad) {
     
     if(rank == 1) {
         Nd4jLong len = input->lengthOf() + paddings->e<Nd4jLong>(0) + paddings->e<Nd4jLong>(1);
-        return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType()));
+        return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(len, input->dataType()));
     }
 
     Nd4jLong* outShapeInfo(nullptr);
diff --git a/libnd4j/include/ops/declarable/generic/transforms/pad.cpp b/libnd4j/include/ops/declarable/generic/transforms/pad.cpp
index d5d38aaeb..d09063a95 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/pad.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/pad.cpp
@@ -104,7 +104,7 @@ DECLARE_SHAPE_FN(pad) {
     ShapeUtils::updateStridesAndType(outShapeInfo, inputShapeInfo, shape::order(inputShapeInfo));
     ShapeDescriptor descriptor(outShapeInfo);
     RELEASE(outShapeInfo, block.getWorkspace());
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(descriptor));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp b/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp
index 99ab3d635..b02f7010c 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/repeat.cpp
@@ -66,7 +66,7 @@ DECLARE_SHAPE_FN(repeat) {
 
     auto outShape = ShapeUtils::evalRepeatShape(axis, repeats, *input);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(input->dataType(), input->ordering(), outShape)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(input->dataType(), input->ordering(), outShape)));
 
 }
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp
index 96e7fe6b3..822f48681 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp
@@ -87,13 +87,13 @@ namespace sd {
 
             shape::calcSubArrShapeInfoAndOffset(indices.data(), input->shapeInfo(), subArrShapeInfo, offset, true);
 
-            auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo);
+            auto subArrShapeInfoPack = ConstantShapeHelper::getInstance().bufferForShapeInfo(subArrShapeInfo);
 
             NDArray::prepareSpecialUse({output}, {input});
 
             NativeOpExecutioner::execTransformAny(block.launchContext(), sd::transform::Assign,
-                                                input->bufferWithOffset(offset), reinterpret_cast<Nd4jLong *>(subArrShapeInfoPack.primary()),
-                                                input->specialBufferWithOffset(offset), reinterpret_cast<Nd4jLong *>(subArrShapeInfoPack.special()),
+                                                input->bufferWithOffset(offset), subArrShapeInfoPack.primary(),
+                                                input->specialBufferWithOffset(offset), subArrShapeInfoPack.special(),
                                                 output->buffer(), output->shapeInfo(), output->specialBuffer(), output->specialShapeInfo(),
                                                 nullptr, nullptr, nullptr, true);
 
@@ -160,7 +160,7 @@ namespace sd {
                 shape.emplace_back(size);
             }
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape);
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp
index 9a1683818..ffffb5396 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch.cpp
@@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(space_to_batch) {
 
     REQUIRE_TRUE((inputShapeInfo[2] + padBottom + padTop) % blockSize == 0 && (inputShapeInfo[3] + padLeft + padRight) % blockSize == 0, 0, "SpaceToBatch: after padding, second and third dimensions of input array must be divisible by blockSize !");
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {inputShapeInfo[1] * blockSize * blockSize, (inputShapeInfo[2] + padBottom + padTop) / blockSize, (inputShapeInfo[3] + padLeft + padRight) / blockSize, inputShapeInfo[4]}));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', {inputShapeInfo[1] * blockSize * blockSize, (inputShapeInfo[2] + padBottom + padTop) / blockSize, (inputShapeInfo[3] + padLeft + padRight) / blockSize, inputShapeInfo[4]}));
 }
 
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp
index 0b8c4152d..5adc35ee6 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_batch_nd.cpp
@@ -96,7 +96,7 @@ DECLARE_SHAPE_FN(space_to_batch_nd) {
     for (uint i = 0; i < numOfSpatialDims; ++i)
         outShape[i + 1] = (outShape[i + 1] + INPUT_VARIABLE(2)->e<uint>(i,0) + INPUT_VARIABLE(2)->e<uint>(i,1)) / INPUT_VARIABLE(1)->e<Nd4jLong>(i);
 
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
 }
 
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp b/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp
index b831dce2f..7e108028a 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/space_to_depth.cpp
@@ -80,7 +80,7 @@ namespace ops {
         else 
             shape = {{bS, oD, oH, oW }};
 
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data());
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(in), 'c', 4, shape.data());
         return SHAPELIST(newShape);
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/transforms/split.cpp b/libnd4j/include/ops/declarable/generic/transforms/split.cpp
index 462f2c77e..3fb925dfc 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/split.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/split.cpp
@@ -115,7 +115,7 @@ namespace ops {
 		//Edge case: splitting empty array (mainly for TF import compatibility) -> return N empty arrays
 		// if(INPUT_VARIABLE(inputVar)->isEmpty()){
 		// 	for (int e = 0; e < num_splits; e++) {
-  //               auto empty = ConstantShapeHelper::getInstance()->emptyShapeInfo(dataType);
+  //               auto empty = ConstantShapeHelper::getInstance().emptyShapeInfo(dataType);
 		// 		shapes->push_back(empty);
 		// 	}
 		// 	return shapes;
@@ -136,7 +136,7 @@ namespace ops {
                 shape[e] = shape::sizeAt(input, e);
 
         for (int e = 0; e < num_splits; e++) {
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dataType, shape::order(input), shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(dataType, shape::order(input), shape);
             shapes->push_back(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp b/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp
index 0bda3a6be..decda2e2d 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/split_v.cpp
@@ -116,7 +116,7 @@ namespace ops {
                     shape[d] = c_size;
             }
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(input), shape::order(input), shape);
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(input), shape::order(input), shape);
             shapeList->push_back(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp
index 65cd41a3a..af03d5ef1 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp
@@ -82,16 +82,16 @@ DECLARE_SHAPE_FN(stack) {
              case 0: {
                  // we're going to return rank 1 here
                  if (block.width() == 1) {
-                     return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(0, ArrayOptions::dataType(inShapeInfo)));
+                     return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(0, ArrayOptions::dataType(inShapeInfo)));
                  } else {
-                     return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), 'c', {(Nd4jLong) block.width(), 0}));
+                     return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), 'c', {(Nd4jLong) block.width(), 0}));
                  }
              }
 	     }
 	 }
 
 	if(rank == 0) {
-	    return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(block.width(), ArrayOptions::dataType(inShapeInfo)));
+	    return SHAPELIST(ConstantShapeHelper::getInstance().vectorShapeInfo(block.width(), ArrayOptions::dataType(inShapeInfo)));
 	}
 
 	//the rank of output ShapeInfo is larger by one compared to input ShapeInfo
@@ -99,7 +99,7 @@ DECLARE_SHAPE_FN(stack) {
 
 	// insert (int) block.width() at dim position of input shape to get output shape
 	outShape.insert(outShape.begin() + Nd4jLong(dim), (Nd4jLong) block.width());
-    return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape)));
+    return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape)));
 }
 
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/tear.cpp b/libnd4j/include/ops/declarable/generic/transforms/tear.cpp
index 61850ab0e..b2292e2b9 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/tear.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/tear.cpp
@@ -57,12 +57,12 @@ namespace sd {
             if (dims.size() > 1)
                 std::sort(dims.begin(), dims.end());
 
-            auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(inShape, dims);
+            auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(inShape, dims);
             auto numTads = tadPack.numberOfTads();
 
             auto result = SHAPELIST();
             for (Nd4jLong e = 0; e < numTads; e++) {
-                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo()));
+                auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo()));
                 result->push_back(newShape);
             }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp
index 4dc259bba..e8a502e74 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp
@@ -93,7 +93,7 @@ DECLARE_SHAPE_FN(tile) {
     for (int e = 0; e < shape::rank(inShape); e++)
         shape[e] = shape::sizeAt(inShape, e) * reps[e];
 
-    auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape);
+    auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShape), shape::order(inShape), shape);
     return SHAPELIST(newShape);
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp b/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp
index beebcad86..0dfe1e54c 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/unstack.cpp
@@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(unstack) {
 
         auto result = SHAPELIST();
         for(uint i = 0; i < numTads; ++i)
-            result->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape));
+            result->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), outShape));
 
         return result;
     }
@@ -88,7 +88,7 @@ DECLARE_SHAPE_FN(unstack) {
 
         auto result = SHAPELIST();
         for (Nd4jLong e = 0; e < shape::length(inShapeInfo); e++)
-            result->push_back(ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShapeInfo)));
+            result->push_back(ConstantShapeHelper::getInstance().scalarShapeInfo(ArrayOptions::dataType(inShapeInfo)));
 
         return result;
     }
@@ -110,7 +110,7 @@ DECLARE_SHAPE_FN(unstack) {
 
     auto result = SHAPELIST();
     for (int e = 0; e < shape::shapeOf(inShapeInfo)[dim]; e++) {
-        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), subArrShape);
+        auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inShapeInfo), shape::order(inShapeInfo), subArrShape);
         result->push_back(newShape);
     }
     return result;
diff --git a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp
index 5518588e4..f7a758af6 100644
--- a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp
+++ b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp
@@ -44,7 +44,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(print_affinity) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/util/print_variable.cpp b/libnd4j/include/ops/declarable/generic/util/print_variable.cpp
index 9d3369627..74ff99fd2 100644
--- a/libnd4j/include/ops/declarable/generic/util/print_variable.cpp
+++ b/libnd4j/include/ops/declarable/generic/util/print_variable.cpp
@@ -42,7 +42,7 @@ namespace sd {
             if (block.numB() > 0)
                 printSpecial = B_ARG(0);
 
-            if (printSpecial && !sd::Environment::getInstance()->isCPU()) {
+            if (printSpecial && !sd::Environment::getInstance().isCPU()) {
                 // only specific backends support special printout. for cpu-based backends it's the same as regular print
 
                 if (block.width() == 2)
@@ -69,7 +69,7 @@ namespace sd {
         }
 
         DECLARE_SHAPE_FN(print_variable) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::INT32));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp
index a03b4504f..aa86ea041 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp
@@ -482,7 +482,7 @@ namespace sd {
 						if (isContinuous) {
 							//we can choose other inc and index for that case
 							//but for now lets choose all till the last one
-							uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads();
+							uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads();
 							isContinuous = false;
 							if (rank > 2) {
 								if (req_numThreads < 2 || bases[rank - 1] >= req_numThreads) {
@@ -582,7 +582,7 @@ namespace sd {
 					if (order == 'c' && isContinuous) {
 						//sometimes last dimension is too big and multithreading could suffer using unfair partitioning
 						//so we will do it only when inc is smaller our value or multithreading turned off
-						uint32_t req_numThreads = sd::Environment::getInstance()->maxMasterThreads();
+						uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads();
 						if (req_numThreads < 2 || numNC >= req_numThreads || inc <= 2 * 8196 || rank == 3) {
 							inc = numHW;
 						}
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp
index 20d91ee8b..3f37666e7 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp
@@ -59,8 +59,8 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr
     }
     else {
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  dimC);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  dimC);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
         const Nd4jLong numOfTads   = packX.numberOfTads();
         const Nd4jLong xDimCstride = input->stridesOf()[dimC];
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp
index 6610b69ac..63f26c90f 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp
@@ -58,8 +58,8 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA
 
         samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3);
     } else {
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  dimC);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  dimC);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
         const Nd4jLong numOfTads   = packX.numberOfTads();
         const Nd4jLong xDimCstride = input->stridesOf()[dimC];
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp
index ec8f040a9..0c9338a8e 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp
@@ -35,7 +35,7 @@ void bgemm_(const std::vector<NDArray*>& vA, const std::vector<NDArray*>& vB, st
 
     int batchSize = vA.size();
 
-    if (BlasHelper::getInstance()->hasBatchedGEMM<T>()) {
+    if (BlasHelper::getInstance().hasBatchedGEMM<T>()) {
         auto arr = vA.at(0);
         CBLAS_TRANSPOSE *tA, *tB;
         int *tM, *tN, *tK, *tldA, *tldB, *tldC, *tsize;
@@ -72,9 +72,9 @@ void bgemm_(const std::vector<NDArray*>& vA, const std::vector<NDArray*>& vB, st
         }
 
         if (std::is_same<T, double>::value) {
-            BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **)  buffersC.data(), tldC, vA.size(), tsize);
+            BlasHelper::getInstance().dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **)  buffersC.data(), tldC, vA.size(), tsize);
         } else if (std::is_same<T, float >::value) {
-            BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize);
+            BlasHelper::getInstance().sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize);
         }
 
         // release temporary arrays
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp
index 2b6b4cd02..89cf680d4 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/dynamic.cpp
@@ -38,7 +38,7 @@ namespace sd {
 
                     unsigned int outSize = outputList.size();
 
-                    //PRAGMA_OMP_PARALLEL_FOR_IF(outSize > Environment::getInstance()->tadThreshold())
+                    //PRAGMA_OMP_PARALLEL_FOR_IF(outSize > Environment::getInstance().tadThreshold())
                     for (unsigned int i = 0; i < outSize; i++) {
                         outputs[i].first = outputList[i];
                         std::vector<int> outDims(outputs[i].first->rankOf() - 1);
@@ -52,7 +52,7 @@ namespace sd {
 
                         outputs[i].second = 0;
 
-                        //PRAGMA_OMP_PARALLEL_FOR_IF(indices->lengthOf() > Environment::getInstance()->elementwiseThreshold())
+                        //PRAGMA_OMP_PARALLEL_FOR_IF(indices->lengthOf() > Environment::getInstance().elementwiseThreshold())
                         for (Nd4jLong e = 0; e < indices->lengthOf(); ++e)
                             if ((*indices).e<Nd4jLong>(e) == i)
                                 listOutForCurrent.at(outputs[i].second++)->assign(listOfTensors.at(e));
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp
index 1deb12752..c28101558 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp
@@ -78,8 +78,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in
 
                 const Nd4jLong numOfSubArrs = indices->lengthOf();
 
-                auto inTadPack  = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsIn);
-                auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsOut);
+                auto inTadPack  = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimsIn);
+                auto outTadPack = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimsOut);
 
                 auto inTadShapeInfo  = inTadPack.primaryShapeInfo();
                 auto outTadShapeInfo = outTadPack.primaryShapeInfo();
@@ -105,8 +105,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in
                             auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]);
 
                             NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign,
-                                                                 inBuff,  inTadShapeInfo,  nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/,
-                                                                 outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/,
+                                                                 inBuff,  inTadShapeInfo,  nullptr/*input specialBuffer*/, nullptr/*input special*/,
+                                                                 outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output special*/,
                                                                  nullptr, nullptr, nullptr, false/*allowParallelism*/);
                         }
                     };
@@ -129,8 +129,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in
 
             std::vector<int> dims  = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis});
 
-            auto inTadPack  = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dims);
-            auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims);
+            auto inTadPack  = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dims);
+            auto outTadPack = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dims);
 
             auto inTadShapeInfo  = inTadPack.primaryShapeInfo();
             auto outTadShapeInfo = outTadPack.primaryShapeInfo();
@@ -158,8 +158,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in
                         auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]);
 
                         NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign,
-                                                             inBuff,  inTadShapeInfo,  nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/,
-                                                             outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/,
+                                                             inBuff,  inTadShapeInfo,  nullptr/*input specialBuffer*/, nullptr/*input special*/,
+                                                             outBuff, outTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output special*/,
                                                              nullptr, nullptr, nullptr, false/*allowParallelism*/);
 
                     }
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp
index db62c4b4f..e6f1a3896 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp
@@ -116,7 +116,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con
                 output->assign(scalarNDArray);
             } else {
                 auto dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis});
-                auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
+                auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
 
                 auto tadArr = NDArray(reinterpret_cast<void *>(reinterpret_cast<T*>(input->buffer()) + tadPack.primaryOffsets()[indices->e<Nd4jLong>(0)]), tadPack.primaryShapeInfo(), output->getContext());
                 output->assign(&tadArr);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp
index 2183b7d5a..108804f38 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp
@@ -91,8 +91,8 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con
         return;
     }
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimC);
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimC);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimC);
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), dimC);
 
     const Nd4jLong numOfTads = packX.numberOfTads();
     const Nd4jLong xDimCstride = input.stridesOf()[dimC];
@@ -149,8 +149,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output,
         samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3);
     }
     else {
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
         const Nd4jLong numOfTads = packX.numberOfTads();
         const Nd4jLong xDimCstride = input->stridesOf()[dimC];
@@ -199,8 +199,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output,
         samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3);
     }
     else {
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
         const Nd4jLong numOfTads = packX.numberOfTads();
         const Nd4jLong xDimCstride = input->stridesOf()[dimC];
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
index 7d376e012..910e10314 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/indexReductions.hpp
@@ -389,7 +389,7 @@ namespace sd {
 			{
 				Nd4jLong inner_total;
 				Nd4jLong inner_last = 0;
-				int maxThreads = sd::Environment::getInstance()->maxMasterThreads();
+				int maxThreads = sd::Environment::getInstance().maxMasterThreads();
 				if (second_rank == 1) {
 					inner_total = inner_bases[0]; 
 					if (inner_total  < threadingThreshold) {
@@ -764,7 +764,7 @@ namespace sd {
 				func(0, 0, total, 1);
 #else
 				//
-				uint32_t numThreads = sd::Environment::getInstance()->maxMasterThreads();
+				uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads();
 			    Nd4jLong inner_total = getLength<true>(inner_bases, second_rank);
 				if (total * inner_total <= threadingThreshold) {
 						numThreads = 1;
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp
index 687153f99..c2bcb8399 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp
@@ -125,8 +125,8 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
         //moving all dimensions (in sorted order)
         //to the back.
         //permuted version of the input shape info for setting up the tad problem
-        auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
-        auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
+        auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
+        auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
 
 
         auto tadShapeShapeInfo = tadPack.primaryShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp
index 8dc31d8c0..b49f8e61c 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp
@@ -35,13 +35,13 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu
 
     const int rank = input->rankOf();
 
-    TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1});
+    TadPack inTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {rank - 1});
     TadPack outTadPack;
 
     if(shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo()))
         outTadPack = inTadPack;
     else
-        outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1});
+        outTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {rank - 1});
 
     const Nd4jLong numOfTads = inTadPack.numberOfTads();
     const Nd4jLong tadLen    = input->sizeAt(-1); 
@@ -151,13 +151,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c
     
     const int rank = input.rankOf();
 
-    TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1});
+    TadPack inTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {rank - 1});
     TadPack gradITadPack;
 
     if(shape::haveSameShapeAndStrides(input.shapeInfo(), gradI.shapeInfo()))
         gradITadPack = inTadPack;
     else
-        gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1});
+        gradITadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(gradI.shapeInfo(), {rank - 1});
 
     const Nd4jLong numOfTads = inTadPack.numberOfTads();
     const Nd4jLong tadLen    = input.sizeAt(-1); 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
index 482709455..8f45c696b 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp
@@ -111,7 +111,7 @@ namespace helpers {
                 invertedMatrix->r<T>(i, i) /= inputMatrix->t<T>(i, i);
         };
 
-        //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance()->elementwiseThreshold())
+        //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance().elementwiseThreshold())
         auto invertUpDiagonals = PRAGMA_THREADS_FOR {
             for (auto i = start; i < stop; i += increment)
                 invertedMatrix->r<T>(i, i + 1) -= (inputMatrix->t<T>(i, i + 1) * invertedMatrix->t<T>(i + 1, i + 1) /
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp
index 53565f3c1..b9225e40d 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp
@@ -47,7 +47,7 @@ namespace helpers {
         else { // rank greater than 1
             std::vector<int> lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1});
 
-            auto pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims);
+            auto pack = sd::ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), lastDims);
 
             SpecialMethods<T>::sortTadGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), lastDims.data(), lastDims.size(), pack.primaryShapeInfo(), pack.primaryOffsets(), reverse);
 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp
index 2aa14585b..41a265ca9 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp
@@ -31,7 +31,7 @@ namespace sd {
                 auto output = reinterpret_cast<Z*>(voutput);
                 auto indices = reinterpret_cast<I const*>(vindices);
 
-                auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(zShapeInfo, {axis});
+                auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(zShapeInfo, {axis});
 
                 auto iLen = static_cast<unsigned int>(shape::length(iShapeInfo));
                 auto tLen = static_cast<unsigned int>(shape::length(tadPack.primaryShapeInfo()));
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
index a7f40899a..ea529112d 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp
@@ -46,7 +46,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator&
 
         // apply Fisher-Yates shuffle
         if(isInplace) {
-            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold())
+            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold())
             for(int i = firstDim-1; i > 0; --i) {
                 int r = rng.relativeInt(i) % i;
                 if(i == r)
@@ -84,7 +84,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator&
 
         // apply Fisher-Yates shuffle
         if(isInplace) {
-            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->elementwiseThreshold())
+            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().elementwiseThreshold())
             for(int i = firstDim - 1; i > 0; --i) {
                 int r = rng.relativeInt(i) % i;
 
@@ -99,7 +99,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator&
             std::vector<int> indices(firstDim);
             std::iota(indices.begin(), indices.end(), 0);
             bool isZeroShuffled = false;
-            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold())
+            //PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold())
             for(int i = firstDim - 1; i > 0; --i) {
                 int r = rng.relativeInt(i) % i;
                 subArrsListOut.at(i)->assign(subArrsListIn.at(indices[r]));
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp b/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp
index 278f3bcf5..2e3d983cd 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/roll.cpp
@@ -43,7 +43,7 @@ namespace helpers {
             int remainShift = fullLen % actualShift;
 
             // stage 1) swap last actualShift elements with first ones.
-            //PRAGMA_OMP_PARALLEL_FOR //_IF(actualShift > Environment::getInstance()->elementwiseThreshold())
+            //PRAGMA_OMP_PARALLEL_FOR //_IF(actualShift > Environment::getInstance().elementwiseThreshold())
             for (int e = 0; e < actualShift; ++e) {
                 int sourceIndex = fullLen - actualShift + e;
 
@@ -56,7 +56,7 @@ namespace helpers {
             }
 
             // stage 2) swap swapped actualShift elements with rest remainShiftCount times.
-            //PRAGMA_OMP_PARALLEL_FOR //_IF(shiftCount > Environment::getInstance()->tadThreshold())
+            //PRAGMA_OMP_PARALLEL_FOR //_IF(shiftCount > Environment::getInstance().tadThreshold())
             for (int count = 1; count < shiftCount; ++count) {
                 for (int e = 0; e < actualShift; ++e) {
                     int destinationIndex = fullLen - (count + 1) * actualShift + e;
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp
index e19eb5dea..0693406bf 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp
@@ -87,7 +87,7 @@ void scatter(sd::LaunchContext  *context, pairwise::Ops op, const NDArray& indic
             }
         };
 
-        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads());
+        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads());
     }
     else {      // outRank > 1
 
@@ -107,7 +107,7 @@ void scatter(sd::LaunchContext  *context, pairwise::Ops op, const NDArray& indic
             }
         };
 
-        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads());
+        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads());
     }
 }
 
@@ -129,7 +129,7 @@ void scatterND(sd::LaunchContext  *context, pairwise::Ops op, const NDArray& ind
             }
         };
 
-        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads());
+        samediff::Threads::parallel_tad(func, 0, indLen, 1, lock ? 1 : sd::Environment::getInstance().maxThreads());
     }
     else {
         std::vector<int> dimsToExcludeInd = ShapeUtils::evalDimsToExclude(indRank, {indRank-1});
@@ -154,7 +154,7 @@ void scatterND(sd::LaunchContext  *context, pairwise::Ops op, const NDArray& ind
             }
         };
 
-        samediff::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance()->maxThreads());
+        samediff::Threads::parallel_tad(func, 0, indLen / indLastDim, 1, lock ? 1 : sd::Environment::getInstance().maxThreads());
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp
index bfd44629c..7fd03f8e4 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp
@@ -187,7 +187,7 @@ namespace sd {
                 }
                 else if(input.isSameShapeStrict(output)) {
 
-                    TadPack tadPack  = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimension);
+                    TadPack tadPack  = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimension);
                     auto tadShapeInfo  = tadPack.primaryShapeInfo();
                     auto tadOffsets    = tadPack.primaryOffsets();
                     const uint numOfSubArrs = tadPack.numberOfTads();
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp
index 694ced4cb..3db322fc8 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp
@@ -47,7 +47,7 @@ static void stack_(const std::vector<const NDArray*>& inArrs, NDArray& output, c
 	}
 	else {
 
-		auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim}));
+		auto zTadPack = ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim}));
 		auto zTadShapeInfo  = zTadPack.primaryShapeInfo();
 
         auto func = PRAGMA_THREADS_FOR {
@@ -57,8 +57,8 @@ static void stack_(const std::vector<const NDArray*>& inArrs, NDArray& output, c
                 void* zBuff = output.bufferWithOffset(zTadPack.primaryOffsets()[i]);
 
                 NativeOpExecutioner::execTransformAny(inArrs[0]->getContext(), transform::Assign,
-                                                     inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/,  nullptr/*input specialShapeInfo*/,
-                                                     zBuff,                  zTadShapeInfo,             nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/,
+                                                     inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/,  nullptr/*input special*/,
+                                                     zBuff,                  zTadShapeInfo,             nullptr/*output specialBuffer*/, nullptr/*output special*/,
                                                      nullptr, nullptr, nullptr, false/*allowParallelism*/);
             }
         };
@@ -92,7 +92,7 @@ static void unstack_(const NDArray& input, const std::vector<NDArray*>& outArrs,
 	}
 	else {
 
-		auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim}));
+		auto xTadPack = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim}));
 		auto xTadShapeInfo  = xTadPack.primaryShapeInfo();
 
         auto func = PRAGMA_THREADS_FOR {
@@ -100,8 +100,8 @@ static void unstack_(const NDArray& input, const std::vector<NDArray*>& outArrs,
                 auto xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]);
 
                 NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign,
-                									 xBuff,                   xTadShapeInfo,              nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/,
-                                                     outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/,  nullptr/*output specialShapeInfo*/,
+                									 xBuff,                   xTadShapeInfo,              nullptr/*input specialBuffer*/, nullptr/*input special*/,
+                                                     outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/,  nullptr/*output special*/,
                                                      nullptr, nullptr, nullptr, false/*allowParallelism*/);
             }
         };
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu
index c8bc709a0..e675342d9 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu
@@ -321,8 +321,8 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output,
 	}
 	else {
 
-		auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {dimension});
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), {dimension});
+		auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {dimension});
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), {dimension});
 
         const int threadsPerBlock = MAX_NUM_THREADS / 4;
         const int blocksPerGrid = packZ.numberOfTads();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu
index 9ce00f318..fff4bfb11 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu
@@ -81,8 +81,8 @@ static _CUDA_H void adjustHueCudaLauncher(const int blocksPerGrid, const int thr
 ////////////////////////////////////////////////////////////////////////
 void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* deltaScalarArr, NDArray *output, const int dimC) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  {dimC});
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  {dimC});
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC});
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -170,11 +170,11 @@ static void _adjust_hue_single(sd::LaunchContext * context, NDArray *array, NDAr
     // numChannels is always 3
     auto tuples = array->lengthOf() / 3;
     if (isNHWC) {
-        adjustHueSingleNHWCKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta);
+        adjustHueSingleNHWCKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->special(), tuples, delta);
     } else {
         // TODO: check this one
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2});
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2});
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {1, 2});
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {1, 2});
 
         auto tadLength = shape::length(packX.primaryShapeInfo());
 
@@ -195,12 +195,12 @@ static void _adjust_hue_batch(sd::LaunchContext * context, NDArray *array, NDArr
         BUILD_SINGLE_SELECTOR(xType, _adjust_hue_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES);
     } else {
         // TODO: check this one
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3});
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3});
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {0, 2, 3});
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {0, 2, 3});
 
-        auto tadLength = shape::length(packX.primaryShapeInfo());
+        auto tadLength = shape::length(packX.primary());
 
-        adjustHueSingleNCHWKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, tuples, delta);
+        adjustHueSingleNCHWKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platform(), packZ.platform(), tadLength, tuples, delta);
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu
index fd413f8cd..36837db29 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu
@@ -83,8 +83,8 @@ static _CUDA_H void adjustSaturationCudaLauncher(const int blocksPerGrid, const
 ////////////////////////////////////////////////////////////////////////
 void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const NDArray* factorScalarArr, NDArray *output, const int dimC) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  {dimC});
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  {dimC});
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC});
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -162,10 +162,10 @@ static void _adjust_saturation_single(sd::LaunchContext * context, NDArray *arra
     auto tuples = array->lengthOf() / 3;
 
     if (isNHWC) {
-        adjustSaturationSingleNHWCKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta);
+        adjustSaturationSingleNHWCKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->special(), tuples, delta);
     } else {
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2});
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2});
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {1, 2});
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {1, 2});
 
         auto tadLength = shape::length(packX.primaryShapeInfo());
 
@@ -185,12 +185,12 @@ static void _adjust_saturation_batch(sd::LaunchContext * context, NDArray *array
         BUILD_SINGLE_SELECTOR(xType, _adjust_saturation_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES);
     } else {
         // TODO: check this one
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3});
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3});
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(array->shapeInfo(), {0, 2, 3});
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {0, 2, 3});
 
-        auto tadLength = shape::length(packX.primaryShapeInfo());
+        auto tadLength = shape::length(packX.primary());
 
-        adjustSaturationSingleNCHWKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, tuples, delta);
+        adjustSaturationSingleNCHWKernel<T><<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), packZ.platform(), packZ.platform(), tadLength, tuples, delta);
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu
index 791953ab7..f7f8bf966 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu
@@ -201,8 +201,8 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc
 
 	// std::vector<int> dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), axes);
 
-	// auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsToExclude);
- //    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsToExclude);
+	// auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimsToExclude);
+ //    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimsToExclude);
 
  //    const int threadsPerBlock = MAX_NUM_THREADS / 2;
  //    const int blocksPerGrid = (mean->lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
@@ -210,7 +210,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc
  //    PointersManager manager(input->getContext(), "batchnorm");
 
  //    NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta});
- //    BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES);
+ //    BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->special(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platform(), packZ.platform(), epsilon), FLOAT_TYPES);
  //    NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta});
 
  //    manager.synchronize();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu
index dfa86124a..fd676ba83 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu
@@ -68,7 +68,7 @@ namespace helpers {
     void _confusionFunctor(sd::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output) {
         auto stream = context->getCudaStream();
 
-        auto pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), 1);
+        auto pack = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), 1);
 
         PointersManager manager(context, "helpers::confusion");
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
index 6f29995d3..bce7316ef 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
@@ -124,7 +124,7 @@ namespace sd {
                     for (int i = sourceDimsLen; i > 0; i--)
                         sourceDims[sourceDimsLen - i] = input->rankOf() - i;
                     //compute tad array for given dimensions
-                    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), sourceDims);
+                    auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), sourceDims);
 
                     std::vector<void *> outBuffers(outSize);
                     std::vector<const Nd4jLong *> tadShapes(outSize);
@@ -140,7 +140,7 @@ namespace sd {
                         for (int k = 1; k < r; k++)
                             outDims[k - 1] = k;
 
-                        auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->shapeInfo(), outDims);
+                        auto packZ = ConstantTadHelper::getInstance().tadForDimensions(outputList.at(i)->shapeInfo(), outDims);
 
                         outBuffers[i] = outputList.at(i)->specialBuffer();
                         tadShapes[i] = packZ.platformShapeInfo();
@@ -262,7 +262,7 @@ namespace sd {
                     for (int i = restDims.size(); i > 0;  i--)
                         restDims[restDims.size() - i] = output->rankOf() - i;
 
-                    auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims);
+                    auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), restDims);
 
                     std::vector<const void *> inputBuffers(inputSize);
                     std::vector<const Nd4jLong *> inputTadShapes(inputSize);
@@ -276,7 +276,7 @@ namespace sd {
                         for (int i = sourceDims.size(); i > 0;  i--)
                             sourceDims[sourceDims.size() - i] = inputs[e]->rankOf() - i;
 
-                        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->shapeInfo(), sourceDims);
+                        auto packX = ConstantTadHelper::getInstance().tadForDimensions(inputs[e]->shapeInfo(), sourceDims);
 
                         indicesBuffers[e] = indices[e]->specialBuffer();
                         indicesShapes[e] = indices[e]->specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu
index c5e8848cb..e1c506879 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu
@@ -114,8 +114,8 @@ namespace helpers {
         if (sizeCol * rateCol < 3)
             colCast = 0;
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size());
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size());
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size());
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size());
         int batchCount = packX.numberOfTads();
 
         PointersManager manager(context, "helpers::extractPatches");
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu
index adb5a3ec4..c6041b33b 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu
@@ -164,9 +164,9 @@ void histogramFixedWidth(sd::LaunchContext* context, const NDArray& input, const
 //         cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong));
 //         if (err != 0)
 //             throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err);
-//         copyBuffers<Nd4jLong ><<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.specialShapeInfo(), output.lengthOf());
-//         histogramFixedWidthKernel<T><<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.specialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge);
-//         returnBuffers<Nd4jLong><<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf());
+//         copyBuffers<Nd4jLong ><<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.special(), output.lengthOf());
+//         histogramFixedWidthKernel<T><<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.special(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge);
+//         returnBuffers<Nd4jLong><<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.special(), output.lengthOf());
 //         //cudaSyncStream(*stream);
 //         err = cudaFree(outputBuffer);
 //         if (err != 0)
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu
index c26b79ee6..749f60c11 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu
@@ -69,8 +69,8 @@ linkage void rgbToYuvCudaLauncher(const int blocksPerGrid, const int threadsPerB
 ///////////////////////////////////////////////////////////////////
 void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC });
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC });
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), { dimC });
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), { dimC });
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -124,8 +124,8 @@ linkage void yuvToRgbCudaLauncher(const int blocksPerGrid, const int threadsPerB
 ///////////////////////////////////////////////////////////////////
 void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC });
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC });
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), { dimC });
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), { dimC });
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -287,8 +287,8 @@ static _CUDA_H void rgbToHsvCudaLauncher(const int blocksPerGrid, const int thre
 ///////////////////////////////////////////////////////////////////
 void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  {dimC});
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  {dimC});
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC});
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -306,8 +306,8 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray*
 
 ///////////////////////////////////////////////////////////////////
 void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) {
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),  {dimC});
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),  {dimC});
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {dimC});
 
     const Nd4jLong numOfTads = packX.numberOfTads();
 
@@ -389,8 +389,8 @@ __global__ void tripleTransformerCuda(const void *vx, const Nd4jLong *xShapeInfo
 
 template <typename T>
 static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) {
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC);
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC);
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
     NDArray::prepareSpecialUse({output}, {input});
     return tripleTransformerCuda<T><<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads());
@@ -399,8 +399,8 @@ static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* ou
 
 template <typename T>
 FORCEINLINE static void yiqRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) {
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC);
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimC);
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimC);
 
     NDArray::prepareSpecialUse({output}, {input});
     return tripleTransformerCuda<T><<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads());
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
index 9876417df..820a6c258 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/indexReductions.cu
@@ -32,7 +32,7 @@ namespace sd {
                     NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
                 }
                 else {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
 
                     NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMax,
                         input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
@@ -51,7 +51,7 @@ namespace sd {
                     NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
                 }
                 else {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
 
                     NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexMin,
                         input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
@@ -70,7 +70,7 @@ namespace sd {
                     NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
                 }
                 else {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
 
                     NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMax,
                         input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
@@ -89,7 +89,7 @@ namespace sd {
                     NativeOpExecutioner::execIndexReduceScalar(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin, input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(), nullptr, output.buffer(), output.shapeInfo(), output.specialBuffer(), output.specialShapeInfo());
                 }
                 else {
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
 
                     NativeOpExecutioner::execIndexReduce(LaunchContext::defaultContext(), indexreduce::Ops::IndexAbsoluteMin,
                         input.buffer(), input.shapeInfo(), input.specialBuffer(), input.specialShapeInfo(),
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu
index 723b0f215..f6e233aab 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu
@@ -61,7 +61,7 @@ static void ismax_(sd::LaunchContext * context, const NDArray* input, NDArray* o
         int dimensionLength = dimensions.size();
         std::vector<int> copy(dimensions);
 
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), copy.data(), copy.size());
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), copy.data(), copy.size());
 
         // we launch legacy IndexMax op, to get indices of max values along dimension
         auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions);
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu
index ebc0732e2..123c06ac5 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu
@@ -116,8 +116,8 @@ namespace helpers {
     template <typename X, typename Z>
     static void lrnBP_(sd::graph::Context& block, const NDArray& input, const NDArray& gradO, NDArray& gradI, const int depth, const float bias, const float alpha, const float beta) {
         auto rank = input.rankOf();
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1});
-        auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1});
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), {rank - 1});
+        auto packZ = ConstantTadHelper::getInstance().tadForDimensions(gradI.shapeInfo(), {rank - 1});
 
         const auto tadLength = shape::length(packX.primaryShapeInfo());
         const int numBlocks = sd::math::nd4j_min<Nd4jLong>(1024, packX.numberOfTads());
@@ -144,8 +144,8 @@ namespace helpers {
     template <typename T>
     static void lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* output, int depth, double bias, double alpha, double beta) {
         auto rank = input->rankOf();
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1});
-        auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1});
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {rank - 1});
+        auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {rank - 1});
 
         const auto tadLength = shape::length(packX.primaryShapeInfo());
         const int numBlocks = sd::math::nd4j_min<Nd4jLong>(1024, packX.numberOfTads());
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu
index 8d8548be5..b28efff80 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu
@@ -48,7 +48,7 @@ namespace helpers {
 
     template <typename T>
     static void fillRegularizer(sd::LaunchContext* context, NDArray& ioMatrix, double const value) {
-        auto lastDimsTads = ConstantTadHelper::getInstance()->tadForDimensions(ioMatrix.shapeInfo(), {-2, -1});
+        auto lastDimsTads = ConstantTadHelper::getInstance().tadForDimensions(ioMatrix.shapeInfo(), {-2, -1});
         auto stream = context->getCudaStream();
         auto rows = ioMatrix.sizeAt(-2);
         //auto cols = ioMatrix.sizeAt(-1);
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu
index 682b2eee9..c59ef9489 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu
@@ -604,8 +604,8 @@ namespace helpers {
 //        output->tickWriteDevice();
         permutationVectors->applyTrueBroadcast(sd::BroadcastOpsTuple::Assign(), iota, *permutationVectors, true, nullptr);
 //        permutationVectors->tickWriteDevice();
-        auto tads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1});
-        auto permutaionTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-1});
+        auto tads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1});
+        auto permutaionTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-1});
         auto batchNum = tads.numberOfTads();
         luBatchedKernel<T,I><<<batchNum, 256, 1024, *stream>>>(reinterpret_cast<T*>(output->platformBuffer()),
                 output->specialShapeInfo(), reinterpret_cast<I*>(permutationVectors->platformBuffer()),
@@ -624,8 +624,8 @@ namespace helpers {
         Nd4jLong n = input->sizeAt(-1);
         Nd4jLong n2 = n * n;
         std::vector<int> dims();
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1});
-        //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1});
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1});
+        //auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {output->rankOf() - 1});
 //        DataType dtype = input->dataType();
 //        if (dtype != DataType::DOUBLE)
 //            dtype = DataType::FLOAT32;
@@ -640,7 +640,7 @@ namespace helpers {
 //            if (matrix.dataType() == input->dataType())
             fillMatrix<T, T><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n);
 //            else
-//                fillMatrix<T, float><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n);
+//                fillMatrix<T, float><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->special(), pos, n);
             lup_<T, int>(context, &matrix, nullptr, nullptr);
 //            else
 //                lup_<float>(context, &matrix, nullptr, nullptr);
@@ -668,8 +668,8 @@ namespace helpers {
             Nd4jLong n = input->sizeAt(-1);
             Nd4jLong n2 = n * n;
             std::vector<int> dims();
-            auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1});
-            //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1});
+            auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1});
+            //auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {output->rankOf() - 1});
             DataType dtype = input->dataType();
             if (dtype != DataType::DOUBLE)
                 dtype = DataType::FLOAT32;
@@ -685,7 +685,7 @@ namespace helpers {
 //            if (matrix.dataType() == input->dataType())
                 fillMatrix<T, T><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n);
 //            else
-//                fillMatrix<T, float><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n);
+//                fillMatrix<T, float><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->special(), pos, n);
 
 //            if (matrix.dataType() == input->dataType())
                 lup_<T, int>(context, &matrix, nullptr, nullptr);
@@ -759,10 +759,10 @@ namespace helpers {
             NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, context);
             NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, context);
             NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, context);
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(),
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(),
                                                                                   {input->rankOf() - 2,
                                                                                    input->rankOf() - 1});
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(),
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(),
                                                                                   {output->rankOf() - 2,
                                                                                    output->rankOf() - 1});
             auto stream = context->getCudaStream();
@@ -849,7 +849,7 @@ namespace helpers {
                 throw cuda_exception::build("helpers::cholesky_: Cannot create solver handle", status);
             }
             F **dArrayBatch = nullptr;
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(),
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(tempOutput.shapeInfo(),
                                                                                   {tempOutput.rankOf() - 2,
                                                                                    tempOutput.rankOf() - 1});
             const Nd4jLong batchSize = packX.numberOfTads();
@@ -980,7 +980,7 @@ namespace helpers {
             auto outputBuf = output->dataBuffer()->specialAsT<T>(); //reinterpret_cast<T*>(output->specialBuffer()); // + e * n2; // + e * n2;
             auto inputBuf = tempOutput.dataBuffer()->specialAsT<T>(); //reinterpret_cast<T*>(tempOutput.specialBuffer());
             output->nullify();
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(),
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(tempOutput.shapeInfo(),
                                                                                   {tempOutput.rankOf() - 2,
                                                                                    tempOutput.rankOf() - 1});
             logDetKernel<T><<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(),
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu
index 78249bc38..446d57b27 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu
@@ -94,8 +94,8 @@ namespace helpers {
         std::vector<int> lastDims({input->rankOf() - 2, input->rankOf() - 1});
         std::vector<int> dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), lastDims);
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), lastDims);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), lastDims);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), lastDims);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), lastDims);
 
         const Nd4jLong numTads = packX.numberOfTads();
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu
index 30d5f0ef9..b8edcbc26 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu
@@ -71,8 +71,8 @@ namespace helpers {
         //tadOnlyInputShapeInfo, tadInputOffsets, tadOnlyOutputShapeInfo, tadOutputOffsets;
         std::vector<int> outputDims({output->rankOf() - 1});
         std::vector<int> inputDims({input->rankOf() - 2, input->rankOf() - 1});
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), inputDims);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), outputDims);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), inputDims);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), outputDims);
 
 
         if (!output->isActualOnDeviceSide())
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu
index 3f2ed13b5..918dca510 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu
@@ -104,7 +104,7 @@ namespace helpers {
             hOutBuffers[i] = outArrs[i]->specialBuffer();
 
 
-            auto pack = ConstantTadHelper::getInstance()->tadForDimensions(outArrs[i]->shapeInfo(), {inIndices[i]});
+            auto pack = ConstantTadHelper::getInstance().tadForDimensions(outArrs[i]->shapeInfo(), {inIndices[i]});
             hOutTadShapes[i] = pack.specialShapeInfo();
             hOutTadOffsets[i] = pack.specialOffsets();
             hNumTads[i] = pack.numberOfTads();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu
index c3b4abc51..c2f34f9fe 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu
@@ -66,7 +66,7 @@ namespace helpers {
         else { // rank greater than 1
             std::vector<int> lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1});
 
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), lastDims);
 
             auto pTadShape = packX.specialShapeInfo();
             auto pTadShapeH = packX.primaryShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu
index 7f2bcdcfd..1bc50fad7 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu
@@ -97,7 +97,7 @@ namespace helpers {
             shape::checkDimensions(inputRank, axis);
 
         auto tempArray = input.dup();
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.shapeInfo(), axis);
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(tempArray.shapeInfo(), axis);
 
         auto tadLength = shape::length(packX.primaryShapeInfo());
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu
index d2832ec80..959b45865 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu
@@ -148,8 +148,8 @@ static void prefixPerBlockCudaLauncher(const int blocksPerGrid, const int thread
 ///////////////////////////////////////////////////////////////////
 void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, const std::vector<int>& dims, bool exclusive, bool reverse) {
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
-    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
+    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims);
 
     const Nd4jLong numTads = packX.numberOfTads();
     const Nd4jLong tadLen = x->lengthOf() / numTads;
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu
index 828867b4e..e499f21d0 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu
@@ -60,9 +60,9 @@ namespace helpers {
         m({col, m.rows(), col, m.columns()}).assign(in({col, m.rows(), col, m.columns()}));
 
 //        auto stream = context->getCudaStream();
-//        matrixMinorKernel<T><<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT<T>(), m.specialShapeInfo(),
-//        matrixMinorKernel<T><<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT<T>(), m.specialShapeInfo(),
-//                reinterpret_cast<T*>(in.specialBuffer()), in.specialShapeInfo(), col, in.rows(), in.columns());
+//        matrixMinorKernel<T><<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT<T>(), m.special(),
+//        matrixMinorKernel<T><<<128, 128, 256, *stream>>>(m.dataBuffer()->specialAsT<T>(), m.special(),
+//                reinterpret_cast<T*>(in.specialBuffer()), in.special(), col, in.rows(), in.columns());
 //
         m.tickWriteDevice();
         return m;
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu
index 2ed45356e..6ae1b22a8 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu
@@ -212,8 +212,8 @@ namespace helpers {
     //////////////////////////////////////////////////////////////////////////
     void reverse(sd::LaunchContext * context, const NDArray* input, NDArray* output, const std::vector<int>* intArgs) {
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), *intArgs);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), *intArgs);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), *intArgs);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), *intArgs);
 
         NDArray::prepareSpecialUse({output}, {input});
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu
index 773f7279d..a5149c978 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu
@@ -253,7 +253,7 @@ namespace helpers {
                 for (int i = 0; i < dims.size(); ++i)
                     dims[i] = axe + 1 + i;
 
-                auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims);
+                auto packZ = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dims);
 
                 int numTads = packZ.numberOfTads();
                 int sizeAt = input->sizeAt(axe);
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu
index 94b0e0080..cbe8895b2 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu
@@ -736,8 +736,8 @@ __global__ static void scatterLockCuda(const int opCode,
         std::vector<int> yTadDims(sizeOfUpdDims);
         std::iota(yTadDims.begin(), yTadDims.end(), 0);
 
-        auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims));
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims);
+        auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims));
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), zTadDims);
 
         const Nd4jLong zTadLen = shape::length(packZ.primaryShapeInfo());
         const Nd4jLong yTadLen = shape::length(packY.primaryShapeInfo());
@@ -963,21 +963,21 @@ __global__ static void scatterLockCuda(const int opCode,
                 std::vector<int> dims = {0};
                 auto inverted = ShapeUtils::evalDimsToExclude(output.rankOf(), dims);
 
-                auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), inverted);
-                auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), inverted);
+                auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), inverted);
+                auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), inverted);
 
                 auto psX = packX.specialShapeInfo();
-                auto psY = packY.specialShapeInfo();
+                auto psY = packY.special();
 
                 PointersManager manager(context, "scatter");
 
                 auto poX = packX.specialOffsets();
-                auto poY = packY.specialOffsets();
+                auto poY = packY.special();
 
                 NDArray::prepareSpecialUse({&output}, {&updates, &indices});
 
                 unsigned int tadLengthX = shape::length(packX.primaryShapeInfo());
-                unsigned int tadLengthY = shape::length(packY.primaryShapeInfo());
+                unsigned int tadLengthY = shape::length(packY.primary());
                 if (tadLengthX != tadLengthY)
                     throw std::runtime_error("scatter: Lengths of TADs must be equal");
 
@@ -1016,9 +1016,9 @@ const int xLastDim = indices.sizeAt(-1);
             zTadDims[i] = zRank - 1 - j;
         }
 
-        auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.shapeInfo(), {xRank - 1});
-        auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), yTadDims);
-        auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims);
+        auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(indices.shapeInfo(), {xRank - 1});
+        auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), yTadDims);
+        auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), zTadDims);
 
         const int threadsPerBlock = MAX_NUM_THREADS / 4;
         const int blocksPerGrid = packZ.numberOfTads();
@@ -1152,16 +1152,16 @@ const int xLastDim = indices.sizeAt(-1);
         // PointersManager::printDevContentOnDev<Nd4jLong>(zShapeInfo, 8);
 
         // manager.printDevContentOnHost<int>(indices.specialBuffer(), indices.lengthOf());
-        // manager.printDevContentOnHost<Nd4jLong>(indices.specialShapeInfo(), shape::shapeInfoLength(indices.rankOf()));
+        // manager.printDevContentOnHost<Nd4jLong>(indices.special(), shape::shapeInfoLength(indices.rankOf()));
         // manager.printDevContentOnHost<float>(updates.specialBuffer(), updates.lengthOf());
-        // manager.printDevContentOnHost<Nd4jLong>(updates.specialShapeInfo(), shape::shapeInfoLength(updates.rankOf()));
-        // manager.printDevContentOnHost<Nd4jLong>(output.specialShapeInfo(), shape::shapeInfoLength(output.rankOf()));
+        // manager.printDevContentOnHost<Nd4jLong>(updates.special(), shape::shapeInfoLength(updates.rankOf()));
+        // manager.printDevContentOnHost<Nd4jLong>(output.special(), shape::shapeInfoLength(output.rankOf()));
         // printf("!!!!!!!\n");
-        // manager.printDevContentOnHost<Nd4jLong>(packX.specialShapeInfo(), 2*shape::rank(packX.primaryShapeInfo()) + 4);
-        // manager.printDevContentOnHost<Nd4jLong>(packX.specialOffsets(), packX.numberOfTads());
-        // manager.printDevContentOnHost<Nd4jLong>(packY.specialShapeInfo(), 2*shape::rank(packY.primaryShapeInfo()) + 4);
-        // manager.printDevContentOnHost<Nd4jLong>(packY.specialOffsets(), packY.numberOfTads());
-        // manager.printDevContentOnHost<Nd4jLong>(packZ.specialShapeInfo(), 2*shape::rank(packZ.primaryShapeInfo()) + 4);
-        // manager.printDevContentOnHost<Nd4jLong>(packZ.specialOffsets(), packZ.numberOfTads());
+        // manager.printDevContentOnHost<Nd4jLong>(packX.special(), 2*shape::rank(packX.primary()) + 4);
+        // manager.printDevContentOnHost<Nd4jLong>(packX.special(), packX.numberOfTads());
+        // manager.printDevContentOnHost<Nd4jLong>(packY.special(), 2*shape::rank(packY.primary()) + 4);
+        // manager.printDevContentOnHost<Nd4jLong>(packY.special(), packY.numberOfTads());
+        // manager.printDevContentOnHost<Nd4jLong>(packZ.special(), 2*shape::rank(packZ.primary()) + 4);
+        // manager.printDevContentOnHost<Nd4jLong>(packZ.special(), packZ.numberOfTads());
         // printf("dddddddd\n");
-        // shape::printShapeInfoLinear(packY.primaryShapeInfo());
\ No newline at end of file
+        // shape::printShapeInfoLinear(packY.primary());
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu
index a17464cbd..3b422a5c2 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu
@@ -51,7 +51,7 @@ namespace sd {
             void scatterSimple_(sd::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector<int>& dimensions) {
 
                 auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions);
-                auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dims);
+                auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dims);
 
                 auto xLength = shape::length(packX.primaryShapeInfo());
                 auto iLength = indices.lengthOf();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu
index 51f917a79..3a3bfef12 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu
@@ -114,8 +114,8 @@ namespace sd {
                 for (int e = 2; e < 2 + numOfDims; e++)
                     tadDimensions[e-2] = (*intArgs)[e];
 
-                auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), tadDimensions);
-                auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), tadDimensions);
+                auto packX = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), tadDimensions);
+                auto packY = ConstantTadHelper::getInstance().tadForDimensions(updates.shapeInfo(), tadDimensions);
 
                 NDArray indices(const_cast<int*>(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, sd::DataType::INT32, context);
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu
index 927b1bb2f..d623c8734 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu
@@ -185,8 +185,8 @@ namespace sd {
                 }
                 else {
                     std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-                    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+                    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
                     auto inputTads = packX.specialShapeInfo();
                     auto inputTadOffsets = packX.specialOffsets();
                     auto outputTads = packZ.specialShapeInfo();
@@ -226,8 +226,8 @@ namespace sd {
                 }
                 else {
                     std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-                    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+                    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
                     auto inputTads = packX.specialShapeInfo();
                     auto inputTadOffsets = packX.specialOffsets();
                     auto outputTads = packZ.specialShapeInfo();
@@ -349,10 +349,10 @@ namespace sd {
                 }
                 else {
                     std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-                    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-                    auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-                    auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+                    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+                    auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+                    auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
                     Nd4jLong const*  inputTads = packX.specialShapeInfo();
                     Nd4jLong const*  inputTadOffsets = packX.specialOffsets();
                     Nd4jLong const*  outputTads = packZ.specialShapeInfo();
@@ -397,10 +397,10 @@ namespace sd {
                 }
                 else {
                     std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-                    auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-                    auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-                    auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+                    auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+                    auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+                    auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
                     Nd4jLong const*  inputTads = packX.specialShapeInfo();
                     Nd4jLong const*  inputTadOffsets = packX.specialOffsets();
                     Nd4jLong const*  outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu
index c75293c1d..5ccecf37c 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu
@@ -174,8 +174,8 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -216,8 +216,8 @@ namespace helpers {
         else {
             output->assign(0);
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             Nd4jLong const*  inputTads = packX.specialShapeInfo();
             Nd4jLong const*  inputTadOffsets = packX.specialOffsets();
             Nd4jLong const*  outputTads = packZ.specialShapeInfo();
@@ -333,10 +333,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-//            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+//            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             Nd4jLong const*  inputTads = packX.specialShapeInfo();
             Nd4jLong const*  inputTadOffsets = packX.specialOffsets();
             Nd4jLong const*  outputTads = packZ.specialShapeInfo();
@@ -386,10 +386,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-//            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+//            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             Nd4jLong const*  inputTads = packX.specialShapeInfo();
             Nd4jLong const*  inputTadOffsets = packX.specialOffsets();
             Nd4jLong const*  outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu
index c6f2d4ed2..9e825c701 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu
@@ -177,8 +177,8 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -222,8 +222,8 @@ namespace helpers {
         else {
             output->assign(DataTypeUtils::max<T>());
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -347,10 +347,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -395,10 +395,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu
index 026ded3e7..44e077300 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu
@@ -138,8 +138,8 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -181,8 +181,8 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -296,10 +296,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -346,10 +346,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu
index b72abeffc..20f232332 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu
@@ -108,8 +108,8 @@ namespace helpers {
         else {
             output->nullify();
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -226,10 +226,10 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-//            auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+//            auto packGradIn = sd::ConstantTadHelper::getInstance().tadForDimensions(tempRes.shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu
index 7a762a526..a2050d695 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu
@@ -186,8 +186,8 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -227,8 +227,8 @@ namespace helpers {
         else {
             output->assign(0);
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -338,9 +338,9 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
@@ -379,9 +379,9 @@ namespace helpers {
         }
         else {
             std::vector<int> dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0});
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions);
-            auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions);
-            auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), dimensions);
+            auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), dimensions);
+            auto packGradOut = sd::ConstantTadHelper::getInstance().tadForDimensions(gradOut->shapeInfo(), dimensions);
             auto inputTads = packX.specialShapeInfo();
             auto inputTadOffsets = packX.specialOffsets();
             auto outputTads = packZ.specialShapeInfo();
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu
index cf8308bbe..43ef78c3e 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu
@@ -73,12 +73,12 @@ namespace sd {
                 helpers::lu(context, leftInput, &leftOutput, &permutations);
                 auto leftLower = leftOutput.dup();
                 auto rightOutput = rightInput->ulike();
-                auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.shapeInfo(), {-2, -1});
+                auto leftLowerTad = ConstantTadHelper::getInstance().tadForDimensions(leftLower.shapeInfo(), {-2, -1});
                 auto stream = context->getCudaStream();
                 oneOnDiagonalKernel<T><<<128, 256, 256, *stream>>>(leftLower.dataBuffer()->specialAsT<T>(), leftLower.specialShapeInfo(), leftLowerTad.specialShapeInfo(), leftLowerTad.specialOffsets(), leftLowerTad.numberOfTads(), leftLower.sizeAt(-1));
                 auto P = leftOutput.ulike(); P.nullify();
-                auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.shapeInfo(), {-2, -1});
-                auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.shapeInfo(), {-1});
+                auto PTad = ConstantTadHelper::getInstance().tadForDimensions(P.shapeInfo(), {-2, -1});
+                auto permutationsTad = ConstantTadHelper::getInstance().tadForDimensions(permutations.shapeInfo(), {-1});
                 restorePermutationsKernel<T><<<128, 256, 256, *stream>>>(P.dataBuffer()->specialAsT<T>(), P.specialShapeInfo(), permutations.dataBuffer()->specialAsT<int>(),
                         PTad.specialShapeInfo(), PTad.specialOffsets(), permutationsTad.specialShapeInfo(), permutationsTad.specialOffsets(), permutationsTad.numberOfTads(), permutations.sizeAt(-1));
                 P.tickWriteDevice();
@@ -120,8 +120,8 @@ namespace sd {
             template <typename T>
             static void adjointMatrix_(sd::LaunchContext* context, NDArray const* input, NDArray* output) {
                 NDArray::prepareSpecialUse({output}, {input});
-                auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1});
-                auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1});
+                auto inputTads = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {-2, -1});
+                auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1});
                 auto stream = context->getCudaStream();
                 auto outputBuf = reinterpret_cast<T*>(output->specialBuffer());
                 auto rows = input->sizeAt(-2);
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu
index f0983b76c..2bb09c3b5 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu
@@ -91,7 +91,7 @@ static void stack_(sd::LaunchContext* context, const std::vector<const NDArray*>
     }
     else {
 
-        auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim}));
+        auto zTadPack = ConstantTadHelper::getInstance().tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim}));
         auto zTadShapeInfo  = zTadPack.primaryShapeInfo();
 
         for (uint i = 0; i < numOfSubArrs; ++i) {
@@ -179,7 +179,7 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std
     }
     else {
 
-        auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim}));
+        auto xTadPack = ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim}));
         auto xTadShapeInfo  = xTadPack.primaryShapeInfo();
 
         for (uint i = 0; i < numOfSubArrs; ++i) {
@@ -272,7 +272,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const
 // 		outArrs[i]->syncToDevice();
 //     input.syncToDevice();
 
-//     BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES);
+//     BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->special(), axis), LIBND4J_TYPES);
 
 //     manager.synchronize();
 
@@ -350,7 +350,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const
 // 		inArrs[i]->syncToDevice();
 //     output.syncToDevice();
 
-//     BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), axis), LIBND4J_TYPES);
+//     BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.special(), axis), LIBND4J_TYPES);
 
 //     manager.synchronize();
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu
index ce19d41cc..61aefa255 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu
@@ -91,7 +91,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const
 
     PointersManager manager(context, "in_top_k");
 
-    const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->shapeInfo(), {1});
+    const auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(predictions->shapeInfo(), {1});
 
     const int threadsPerBlock = MAX_NUM_THREADS;
     const int blocksPerGrid = static_cast<int>(packX.numberOfTads());
@@ -243,9 +243,9 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const
     template <typename X, typename Y>
     static int topKFunctor_(sd::LaunchContext * context, const NDArray* input, NDArray* values, NDArray* indices, const uint k, bool needSort) {
 
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 1});
-        auto packI = ConstantTadHelper::getInstance()->tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1});
-        auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(values->shapeInfo(), {input->rankOf() - 1});
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {input->rankOf() - 1});
+        auto packI = ConstantTadHelper::getInstance().tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1});
+        auto packZ = ConstantTadHelper::getInstance().tadForDimensions(values->shapeInfo(), {input->rankOf() - 1});
 
         auto tadLength = shape::length(packX.primaryShapeInfo());
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
index f14b12e35..8d7f700dd 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
@@ -321,7 +321,7 @@ void tileBP(sd::LaunchContext * context, const NDArray& gradO /*input*/, NDArray
     template <typename T>
     static __global__ void fillShuffleKernel(T* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) {
 
-//        PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold())
+//        PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance().tadThreshold())
         auto tid = blockIdx.x * blockDim.x;
         auto step = blockDim.x * gridDim.x;
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
index 6302262be..e77bb4e19 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu
@@ -141,9 +141,9 @@ namespace sd {
             static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput,
                     bool lower, bool unitsOnDiag, NDArray* output) {
                 NDArray::prepareSpecialUse({output}, {leftInput, rightInput});
-                auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1});
-                auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1});
-                auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1});
+                auto leftTads = ConstantTadHelper::getInstance().tadForDimensions(leftInput->shapeInfo(), {-2, -1});
+                auto rightTads = ConstantTadHelper::getInstance().tadForDimensions(rightInput->shapeInfo(), {-2, -1});
+                auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1});
 
                 auto stream = context->getCudaStream();
                 T const* leftBuf = reinterpret_cast<T const*>(leftInput->specialBuffer());
@@ -243,8 +243,8 @@ namespace sd {
             static void adjointTriangularMatrix_(sd::LaunchContext* context, NDArray const* input, bool const lower,
                     NDArray* output) {
 
-                auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1});
-                auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1});
+                auto inputTads = ConstantTadHelper::getInstance().tadForDimensions(input->shapeInfo(), {-2, -1});
+                auto outputTads = ConstantTadHelper::getInstance().tadForDimensions(output->shapeInfo(), {-2, -1});
                 auto stream = context->getCudaStream();
                 auto inputBuf = reinterpret_cast<T const*>(input->specialBuffer());
                 auto outputBuf = reinterpret_cast<T*>(output->specialBuffer());
diff --git a/libnd4j/include/ops/declarable/impl/BooleanOp.cpp b/libnd4j/include/ops/declarable/impl/BooleanOp.cpp
index 00079f9ae..07960497a 100644
--- a/libnd4j/include/ops/declarable/impl/BooleanOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/BooleanOp.cpp
@@ -33,7 +33,7 @@ namespace sd {
         * Output shape of any BooleanOp is ALWAYS scalar
         */
         ShapeList *BooleanOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) {
-            return SHAPELIST(ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::BOOL));
+            return SHAPELIST(ConstantShapeHelper::getInstance().scalarShapeInfo(DataType::BOOL));
         }
 
         bool BooleanOp::verify(sd::graph::Context &block) {
diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp
index 8f0a6dcb8..634236d35 100644
--- a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp
@@ -38,32 +38,32 @@ namespace sd {
             if(shape::isEmpty(x) || shape::isEmpty(y)) {
                 // this is edge case, [3, 4] + [] = []
                 if ((shape::isEmpty(x) && shape::rank(x) == 0) || (shape::isEmpty(y) && shape::rank(y) == 0)) {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype)));
                     return shapeList;
                 }
 
                 const Nd4jLong *newshape = nullptr;
                 ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace());
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype)));
 			} else if (shape::isScalar(x) && shape::isScalar(y)) {
                 if (shape::rank(x) >= shape::rank(y)) {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
                 } else {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype)));
                 }
             } else if (shape::equalsSoft(x, y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             } else if (shape::isScalar(x) && !shape::isScalar(y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype)));
             } else if (!shape::isScalar(x) && shape::isScalar(y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             } else if (ShapeUtils::areShapesBroadcastable(x, y)) {
                 const Nd4jLong *newshape = nullptr;
                 ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace());
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype)));
             } else {
                 // in this case we'll throw exception later
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             }
 
             return shapeList;
diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp
index 7f7a14861..4611d49cb 100644
--- a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp
@@ -36,7 +36,7 @@ namespace sd {
             auto outputs = _descriptor->getOutputTypesForOutput(0);
             sd::DataType dtype = block.dataType(0);
             if (block.dataType(0) != sd::DataType::BOOL && !(outputs.size() == 1 && outputs[0] == sd::DataType::BOOL)) {
-                if (Environment::getInstance()->isExperimentalBuild()) {
+                if (Environment::getInstance().isExperimentalBuild()) {
                     if (shape::length(y) > shape::length(x)) {
                         dtype = DataTypeUtils::pickPairwiseResultType(y, x);
                     } else {
@@ -51,33 +51,33 @@ namespace sd {
             if(shape::isEmpty(x) || shape::isEmpty(y)) {
                 // this is edge case, [3, 4] + [] = []
                 if ((shape::isEmpty(x) && shape::rank(x) == 0) || (shape::isEmpty(y) && shape::rank(y) == 0)) {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor::emptyDescriptor(dtype)));
                     return shapeList;
                 }
 
 
                 const Nd4jLong *newshape = nullptr;
                 ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace());
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype)));
 			} else if (shape::isScalar(x) && shape::isScalar(y)) {
                 if (shape::rank(x) >= shape::rank(y)) {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
                 } else {
-                    shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype)));
+                    shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype)));
                 }
             } else if (shape::equalsSoft(x, y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             } else if (shape::isScalar(x) && !shape::isScalar(y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(y, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(y, dtype)));
             } else if (!shape::isScalar(x) && shape::isScalar(y)) {
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             } else if (ShapeUtils::areShapesBroadcastable(x, y)) {
                 const Nd4jLong *newshape = nullptr;
                 ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace());
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newshape, dtype)));
             } else {
                 // in this case we'll throw exception later
-                shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype)));
+                shapeList->push_back(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(x, dtype)));
             }
 
             return shapeList;
diff --git a/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp
index 13aa763f8..d70355038 100644
--- a/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/DeclarableListOp.cpp
@@ -47,7 +47,7 @@ namespace sd {
         ShapeList* DeclarableListOp::calculateOutputShape(ShapeList* inputShape, sd::graph::Context& block) {
             // TODO: ensure this method isn't ever called
 
-            auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'c', {1, 1});
+            auto newShape = ConstantShapeHelper::getInstance().createShapeInfo(block.dataType(), 'c', {1, 1});
             return SHAPELIST(newShape);
         }
 
diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
index 713a02666..cd8d0bdd8 100644
--- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
@@ -158,7 +158,7 @@ namespace sd {
 
             auto fp = ctx.isFastPath();
 
-            if (Environment::getInstance()->isProfiling()) {
+            if (Environment::getInstance().isProfiling()) {
                 if (ctx.getVariableSpace() != nullptr && ctx.getVariableSpace()->flowPath() != nullptr) {
                     prof = ctx.getVariableSpace()->flowPath()->profile();
                     node = prof->nodeById(ctx.nodeId());
@@ -166,7 +166,7 @@ namespace sd {
             }
 
             if (ctx.isInplace()) {
-                if (Environment::getInstance()->isProfiling() && node != nullptr) {
+                if (Environment::getInstance().isProfiling() && node != nullptr) {
                     if (fp) {
                         //
                     } else {
@@ -227,7 +227,7 @@ namespace sd {
                 ShapeList inSha;
                 int results = 0;
 
-                if (Environment::getInstance()->isProfiling() && node != nullptr)
+                if (Environment::getInstance().isProfiling() && node != nullptr)
                     inputStart = std::chrono::system_clock::now();
 
                 int cntIn = 0;
@@ -263,7 +263,7 @@ namespace sd {
                 }
 
                 // optionally saving input time
-                if (Environment::getInstance()->isProfiling() && node != nullptr) {
+                if (Environment::getInstance().isProfiling() && node != nullptr) {
                     inputEnd = std::chrono::system_clock::now();
                     auto inputTime = std::chrono::duration_cast<std::chrono::nanoseconds>(inputEnd - inputStart).count();
                     node->setInputTime(inputTime);
@@ -279,7 +279,7 @@ namespace sd {
                 results = outSha->size();
 
                 // optionally saving shapeTime
-                if (Environment::getInstance()->isProfiling() && node != nullptr) {
+                if (Environment::getInstance().isProfiling() && node != nullptr) {
                     shapeEnd = std::chrono::system_clock::now();
                     auto prepTime = std::chrono::duration_cast<std::chrono::nanoseconds>(shapeEnd - shapeStart).count();
                     node->setShapeFunctionTime(prepTime);
@@ -299,7 +299,7 @@ namespace sd {
                         std::pair<int, int> pair(ctx.nodeId(), cnt++);
 
                         if (!ctx.isValueAvailable(pair.second)) {
-                            if (Environment::getInstance()->isDebugAndVerbose())
+                            if (Environment::getInstance().isDebugAndVerbose())
                                 shape::printShapeInfoLinear("Going to create variable with shape", out);
 
                             // we're creating non-initialized array here
@@ -367,7 +367,7 @@ namespace sd {
                 delete outSha;
 
                 // saving arrayTime
-                if (Environment::getInstance()->isProfiling() && node != nullptr) {
+                if (Environment::getInstance().isProfiling() && node != nullptr) {
                     arrayEnd = std::chrono::system_clock::now();
                     auto arrayTime = std::chrono::duration_cast<std::chrono::nanoseconds>(arrayEnd - arrayStart).count();
                     node->setArrayTime(arrayTime);
@@ -599,7 +599,7 @@ namespace sd {
             Nd4jLong prepTime, outerTime;
 
             Nd4jLong memoryBefore = block->workspace() == nullptr ? 0L : block->workspace()->getSpilledSize() + block->workspace()->getUsedSize();
-            if (Environment::getInstance()->isProfiling())
+            if (Environment::getInstance().isProfiling())
                 timeEnter = std::chrono::system_clock::now();
 
             // basic validation: ensure inputs are set
@@ -615,7 +615,7 @@ namespace sd {
             // this method will allocate output NDArrays for this op
             auto numOutputs = this->prepareOutputs(*block);
 
-            if (Environment::getInstance()->isProfiling()) {
+            if (Environment::getInstance().isProfiling()) {
                 timeStart = std::chrono::system_clock::now();
                 prepTime = std::chrono::duration_cast<std::chrono::nanoseconds>(timeStart - timeEnter).count();
             }
@@ -625,10 +625,10 @@ namespace sd {
             bool hasHelper = false;
 
             // platform helpers use might be forbidden for various reasons, so we'll check it out first
-            if (block->helpersAllowed() && sd::Environment::getInstance()->helpersAllowed()) {
+            if (block->helpersAllowed() && sd::Environment::getInstance().helpersAllowed()) {
                 // if we have platform-specific helper for this op - invoke it
-                if (OpRegistrator::getInstance()->hasHelper(this->getOpHash(), block->engine())) {
-                    auto helper = OpRegistrator::getInstance()->getPlatformHelper(this->getOpHash(), block->engine());
+                if (OpRegistrator::getInstance().hasHelper(this->getOpHash(), block->engine())) {
+                    auto helper = OpRegistrator::getInstance().getPlatformHelper(this->getOpHash(), block->engine());
                     if (helper->isUsable(*block)) {
                         status = helper->invokeHelper(*block);
                         hasHelper = true;
@@ -641,13 +641,13 @@ namespace sd {
                 status = this->validateAndExecute(*block);
 
             // optionally saving execution time
-            if (Environment::getInstance()->isProfiling()) {
+            if (Environment::getInstance().isProfiling()) {
                 timeEnd = std::chrono::system_clock::now();
                 outerTime = std::chrono::duration_cast<std::chrono::nanoseconds>(timeEnd - timeStart).count();
                 block->setInnerTime(outerTime);
             }
 
-            if (Environment::getInstance()->isProfiling() && block->getVariableSpace() != nullptr) {
+            if (Environment::getInstance().isProfiling() && block->getVariableSpace() != nullptr) {
                 auto fp = block->getVariableSpace()->flowPath();
                 if (fp != nullptr) {
                     auto p = fp->profile();
@@ -663,7 +663,7 @@ namespace sd {
 
 
             // now we print out all outputs for this node
-            if (sd::Environment::getInstance()->isDebugAndVerbose()) {
+            if (sd::Environment::getInstance().isDebugAndVerbose()) {
                 auto vs = block->getVariableSpace();
 
                 for (int e = 0; e < numOutputs; e++) {
diff --git a/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp
index 4f6646694..2dd281991 100644
--- a/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/DeclarableReductionOp.cpp
@@ -52,7 +52,7 @@ namespace sd {
 
             // special case - output is scalar
             if (dims.size() == 0 || (dims.size() == 1 && dims.at(0) == sd::DataTypeUtils::max<int>())) {
-                auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType());
+                auto newShape = ConstantShapeHelper::getInstance().scalarShapeInfo(block.dataType());
                 return SHAPELIST(newShape);
             }
 
diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp
index f7cb3de92..a171ff339 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp
@@ -41,11 +41,11 @@ namespace sd {
 
             int opNum = block.opNum() < 0 ? this->_opNum : block.opNum();
 
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
             PointersManager manager(block.launchContext(), "LegacyBroadcastBoolOp");
-            auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-            auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+            auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+            auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
             REQUIRE_TRUE(shape::length(packX.primaryShapeInfo()) == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]", (int) shape::length(packX.primaryShapeInfo()), (int) y->lengthOf());
 
@@ -57,10 +57,10 @@ namespace sd {
             else {
                 // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info
 
-                auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims);
+                auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims);
 
-                auto zTadShape =  Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo));
-                auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong));
+                auto zTadShape =  Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo));
+                auto zTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong));
 
                 NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                         y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(),
@@ -91,7 +91,7 @@ namespace sd {
         */
         ShapeList* LegacyBroadcastBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) {
             auto inShape = inputShape->at(0);
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp
index 82899bbdb..c47cc9040 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp
@@ -47,14 +47,14 @@ namespace sd {
 
             int opNum = block.opNum() < 0 ? this->_opNum : block.opNum();
 
-            auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+            auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
             auto tadLen = shape::length(packX.primaryShapeInfo());
             REQUIRE_TRUE(tadLen == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]",tadLen, (int) y->lengthOf());
 
             PointersManager manager(block.launchContext(),"LegacyBroadcastOp");
-            auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-            auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+            auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+            auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
             if (x == z)
                 NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
@@ -62,10 +62,10 @@ namespace sd {
                         z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), dims.size(), pTadShape, pTadOffsets, pTadShape, pTadOffsets);
             else {
                 // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info
-                auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims);
+                auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims);
 
-                auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo));
-                auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong));
+                auto zTadShape = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo));
+                auto zTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong));
 
                 NativeOpExecutioner::execBroadcast(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                         y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(),
diff --git a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp
index 7fc6bf793..a9e8475c0 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp
@@ -54,7 +54,7 @@ namespace sd {
                 newShape[6] = 1;
                 newShape[7] = 99;
 
-                auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape, DataType::INT64));
+                auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape, DataType::INT64));
                 RELEASE(newShape, block.getWorkspace());
                 return SHAPELIST(result);
             } else if (block.getAxis()->size()){
@@ -89,7 +89,7 @@ namespace sd {
                         newShape[6] = 1;
                         newShape[7] = 99;
 
-                        auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newShape, DataType::INT64));
+                        auto result = ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(newShape, DataType::INT64));
                         RELEASE(newShape, block.getWorkspace());
                         return SHAPELIST(result);
                 } else {
@@ -139,7 +139,7 @@ namespace sd {
                     if (dims.size() > 1)
                         std::sort(dims.begin(), dims.end());
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
                     NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->buffer(), x->shapeInfo(),
                                                         x->specialBuffer(), x->specialShapeInfo(),
@@ -147,7 +147,7 @@ namespace sd {
                                                         reinterpret_cast<Nd4jLong *>(z->buffer()), z->shapeInfo(),
                                                         z->specialBuffer(), z->specialShapeInfo(),
                                                         nullptr, (int) dims.size(),
-                                                        Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets());
+                                                        Environment::getInstance().isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance().isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets());
                 }
             } else {
                 // TF mode
@@ -175,7 +175,7 @@ namespace sd {
 
                     REQUIRE_TRUE(axis.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), axis);
+                    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), axis);
 
                     NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum,
                             x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
@@ -183,8 +183,8 @@ namespace sd {
                             reinterpret_cast<Nd4jLong *>(z->buffer()),
                             z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(),
                             nullptr, (int) axis.size(),
-                            Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(),
-                            Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets());
+                            Environment::getInstance().isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(),
+                            Environment::getInstance().isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets());
                 }
             }
 
diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp
index 11a05a76c..8b6e1406e 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp
@@ -67,7 +67,7 @@ namespace sd {
         */
         ShapeList *LegacyPairwiseTransformBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) {
             auto inShape = inputShape->at(0);
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
         }
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp
index 085780c56..09c0a054a 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp
@@ -344,7 +344,7 @@ namespace sd {
                 auto zShapeVector = zShapeArr->asVectorT<Nd4jLong>();
                 auto dtype = block.dataType();
 
-                return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector));
+                return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(dtype, 'c', zShapeVector));
             } else
                 throw std::runtime_error("LegacyRandomOp: Unknown input data type!");
         }
diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp
index f110c0c55..700e0dba9 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp
@@ -52,16 +52,16 @@ namespace sd {
                     if (dims[e] < 0)
                         dims[e] += x->rankOf();
 
-                auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
-                auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims);
+                auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
+                auto packZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z->shapeInfo(), dims);
 
                 REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!");
 
-                auto xTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadX.tadOnlyShapeInfo));
-                auto xTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOffsets, tadX.numTads * sizeof(Nd4jLong));
+                auto xTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadX.tadOnlyShapeInfo));
+                auto xTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadX.tadOffsets, tadX.numTads * sizeof(Nd4jLong));
 
-                auto yTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadY.tadOnlyShapeInfo));
-                auto yTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOffsets, tadY.numTads * sizeof(Nd4jLong));
+                auto yTadShape = Environment::getInstance().isCPU() ? packZ.primaryShapeInfo() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadY.tadOnlyShapeInfo));
+                auto yTadOffsets = Environment::getInstance().isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadY.tadOffsets, tadY.numTads * sizeof(Nd4jLong));
 
                 NativeOpExecutioner::execReduce3(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                         extras.argumentsAsT(z->dataType()),
diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp
index 4aced5aec..e16e71619 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp
@@ -75,10 +75,10 @@ namespace sd {
 
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(x->dataType()),
@@ -111,10 +111,10 @@ namespace sd {
 
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()),
                             z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets);
diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp
index 55197844a..a0ff14858 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp
@@ -76,10 +76,10 @@ namespace sd {
 
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(),
@@ -109,10 +109,10 @@ namespace sd {
                     // TAD
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(),
diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp
index 628c4cb5f..f5007ff03 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp
@@ -78,10 +78,10 @@ namespace sd {
 
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(x->dataType()),
@@ -111,10 +111,10 @@ namespace sd {
                     // TAD
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()),
                             z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets);
diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp
index e406a3a2d..299d19f14 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp
@@ -73,10 +73,10 @@ namespace sd {
 
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(z->dataType()),
@@ -106,10 +106,10 @@ namespace sd {
                     // TAD
                     REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!");
 
-                    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                    auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                    auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                    auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                    auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                     NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),
                             extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(),
diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp
index 3e73b10f5..0c700b88b 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp
@@ -72,7 +72,7 @@ namespace sd {
 
                 x->applyScalarArr(static_cast<sd::scalar::Ops>(opNum), y, *z);
                 // NDArray::prepareSpecialUse({z}, {x, &y});
-                // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1));
+                // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.special(), extras.argumentsAsT(z->dataType(), 1));
 
                 manager.synchronize();
             } else {
diff --git a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp
index b8694f9ff..4a60064b5 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp
@@ -58,10 +58,10 @@ namespace sd {
 
                 REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!");
 
-                auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims);
+                auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x->shapeInfo(), dims);
 
-                auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
-                auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
+                auto pTadShape = Environment::getInstance().isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo));
+                auto pTadOffsets = Environment::getInstance().isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong));
 
                 NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()),
                         z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected);
diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp
index a0651d1fc..3bf4f1ff4 100644
--- a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp
@@ -65,7 +65,7 @@ namespace sd {
         */
         ShapeList *LegacyTransformBoolOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) {
             auto inShape = inputShape->at(0);
-            return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
+            return SHAPELIST(ConstantShapeHelper::getInstance().createShapeInfo(ShapeDescriptor(inShape, DataType::BOOL)));
         }
     }
 }
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp b/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp
index 398c11729..84c1bc291 100644
--- a/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp
+++ b/libnd4j/include/ops/declarable/impl/OpDescriptor.cpp
@@ -33,7 +33,7 @@ namespace sd {
             _numOutputs = 1;
 
             _opName = opName;
-            _hash = sd::ops::HashHelper::getInstance()->getLongHash(_opName);
+            _hash = sd::ops::HashHelper::getInstance().getLongHash(_opName);
             _opClass = sd::graph::OpClass_CONDITIONAL;
 
             _scalar = isScalar;
@@ -44,7 +44,7 @@ namespace sd {
             _numOutputs = 1;
 
             _opName = opName;
-            _hash = sd::ops::HashHelper::getInstance()->getLongHash(_opName);
+            _hash = sd::ops::HashHelper::getInstance().getLongHash(_opName);
             _opClass = sd::graph::OpClass_CONDITIONAL;
 
             _scalar = isScalar;
@@ -77,7 +77,7 @@ namespace sd {
             std::string tmp(opName);
             _opName = tmp;
             _allowsInplace = allowsInplace;
-            _hash = sd::ops::HashHelper::getInstance()->getLongHash(tmp);
+            _hash = sd::ops::HashHelper::getInstance().getLongHash(tmp);
             _divergent = false;
 
             // just default value
diff --git a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp
index 65d694dea..327cb0482 100644
--- a/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp
+++ b/libnd4j/include/ops/declarable/impl/OpRegistrator.cpp
@@ -31,31 +31,29 @@ namespace sd {
         template <typename OpName>
         __registrator<OpName>::__registrator() {
             auto ptr = new OpName();
-            OpRegistrator::getInstance()->registerOperation(ptr);
+            OpRegistrator::getInstance().registerOperation(ptr);
         }
 
 
         template <typename OpName>
         __registratorSynonym<OpName>::__registratorSynonym(const char *name, const char *oname) {
-            auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance()->getOperation(oname));
+            auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance().getOperation(oname));
             if (ptr == nullptr) {
                 std::string newName(name);
                 std::string oldName(oname);
 
-                OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName);
+                OpRegistrator::getInstance().updateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName);
                 return;
             }
-            OpRegistrator::getInstance()->registerOperation(name, ptr);
+            OpRegistrator::getInstance().registerOperation(name, ptr);
         }
 
         ///////////////////////////////
 
 
-        OpRegistrator* OpRegistrator::getInstance() {
-            if (!_INSTANCE)
-                _INSTANCE = new sd::ops::OpRegistrator();
-
-            return _INSTANCE;
+        OpRegistrator& OpRegistrator::getInstance() {
+          static OpRegistrator instance;
+          return instance;
         }
 
 
@@ -89,21 +87,15 @@ namespace sd {
         }
 
         void OpRegistrator::sigIntHandler(int sig) {
-#ifndef _RELEASE
-            delete OpRegistrator::getInstance();
-#endif
+
         }
 
         void OpRegistrator::exitHandler() {
-#ifndef _RELEASE
-            delete OpRegistrator::getInstance();
-#endif
+
         }
 
         void OpRegistrator::sigSegVHandler(int sig) {
-#ifndef _RELEASE
-            delete OpRegistrator::getInstance();
-#endif
+
         }
 
         OpRegistrator::~OpRegistrator() {
@@ -156,7 +148,7 @@ namespace sd {
             std::pair<std::string, sd::ops::DeclarableOp*> pair(str, op);
             _declarablesD.insert(pair);
 
-            auto hash = sd::ops::HashHelper::getInstance()->getLongHash(str);
+            auto hash = sd::ops::HashHelper::getInstance().getLongHash(str);
             std::pair<Nd4jLong, sd::ops::DeclarableOp*> pair2(hash, op);
             _declarablesLD.insert(pair2);
             return true;
@@ -256,8 +248,6 @@ namespace sd {
 
             return result;
         }
-
-        sd::ops::OpRegistrator* sd::ops::OpRegistrator::_INSTANCE = 0;
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp
index dfc18d33b..245626c09 100644
--- a/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp
+++ b/libnd4j/include/ops/declarable/impl/PlatformHelper.cpp
@@ -27,7 +27,7 @@ namespace sd {
             PlatformHelper::PlatformHelper(const char *name, samediff::Engine engine) {
                 // we just store name/hash of target operation
                 _name = std::string(name);
-                _hash = HashHelper::getInstance()->getLongHash(_name);
+                _hash = HashHelper::getInstance().getLongHash(_name);
                 _engine = engine;
             }
 
diff --git a/libnd4j/include/ops/impl/gemm.cpp b/libnd4j/include/ops/impl/gemm.cpp
index 0c4ab167c..8632ddcb9 100644
--- a/libnd4j/include/ops/impl/gemm.cpp
+++ b/libnd4j/include/ops/impl/gemm.cpp
@@ -68,7 +68,7 @@ namespace sd {
             if (beta == 0.0) {
                 Z z = 0.f;
                 int length = M*N;
-                if (length <= Environment::getInstance()->elementwiseThreshold()) {
+                if (length <= Environment::getInstance().elementwiseThreshold()) {
                     for (int r = 0; r < length; r++)
                         C[r] = z;
                 } else {
diff --git a/libnd4j/include/ops/impl/specials_double.hpp b/libnd4j/include/ops/impl/specials_double.hpp
index 1eaf3fbc0..d219220ac 100644
--- a/libnd4j/include/ops/impl/specials_double.hpp
+++ b/libnd4j/include/ops/impl/specials_double.hpp
@@ -224,8 +224,8 @@ PRAGMA_OMP_SINGLE_ARGS(nowait)
         auto x = reinterpret_cast<X*>(vx);
         auto y = reinterpret_cast<Y*>(vy);
 
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
-        auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength);
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto packY = ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength);
 
         auto xLength = shape::length(xShapeInfo);
         auto xTadLength = shape::length(packX.primaryShapeInfo());
@@ -248,8 +248,8 @@ PRAGMA_OMP_SINGLE_ARGS(nowait)
         auto x = reinterpret_cast<X*>(vx);
         auto y = reinterpret_cast<Y*>(vy);
 
-        auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength);
-        auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength);
+        auto packX = ConstantTadHelper::getInstance().tadForDimensions(xShapeInfo, dimension, dimensionLength);
+        auto packY = ConstantTadHelper::getInstance().tadForDimensions(yShapeInfo, dimension, dimensionLength);
 
         auto xLength = shape::length(xShapeInfo);
         auto xTadLength = shape::length(packX.primaryShapeInfo());
diff --git a/libnd4j/include/ops/special_random_ops.h b/libnd4j/include/ops/special_random_ops.h
index 08808e67c..f9bacf5cb 100644
--- a/libnd4j/include/ops/special_random_ops.h
+++ b/libnd4j/include/ops/special_random_ops.h
@@ -163,7 +163,7 @@ namespace randomOps {
 
             int elementsPerThread = zLength / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             if (zEWS >= 1 && xEWS >= 1 && yEWS >= 1) {
                 auto func = PRAGMA_THREADS_FOR {
@@ -315,7 +315,7 @@ namespace randomOps {
 
             int elementsPerThread = middle / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             int span = (middle / _threads) + 8;
 
@@ -434,7 +434,7 @@ namespace randomOps {
 
             int elementsPerThread = zLength / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             T prob = extraArguments[1];
 
@@ -542,7 +542,7 @@ namespace randomOps {
 
             int elementsPerThread = zLength / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             T prob = extraArguments[1];
 
@@ -684,7 +684,7 @@ namespace randomOps {
             Nd4jLong middle = zLength / 2 + (zLength % 2);
             int elementsPerThread = middle / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             const T epsilon = static_cast<T>(1e-5);
 
@@ -801,7 +801,7 @@ namespace randomOps {
 
             int elementsPerThread = middle / TAD_THRESHOLD;
             int _threads = sd::math::nd4j_max<int>(1, elementsPerThread);
-            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance()->maxThreads());
+            _threads = sd::math::nd4j_min<int>(_threads, sd::Environment::getInstance().maxThreads());
 
             int span = (zLength / _threads) + 8;
 
diff --git a/libnd4j/include/system/Environment.h b/libnd4j/include/system/Environment.h
index 392e70871..9b2a4b65b 100644
--- a/libnd4j/include/system/Environment.h
+++ b/libnd4j/include/system/Environment.h
@@ -62,11 +62,9 @@ namespace sd{
         // device compute capability for CUDA
         std::vector<Pair> _capabilities;
 
-        static Environment* _instance;
-
         Environment();
-        ~Environment();
     public:
+      ~Environment();
         /**
          * These 3 fields are mostly for CUDA/cuBLAS version tracking
          */
@@ -74,7 +72,7 @@ namespace sd{
         int _blasMinorVersion = 0;
         int _blasPatchVersion = 0;
 
-        static Environment* getInstance();
+        static Environment& getInstance();
 
         bool isVerbose();
         void setVerbose(bool reallyVerbose);
diff --git a/libnd4j/include/system/op_boilerplate.h b/libnd4j/include/system/op_boilerplate.h
index 1df4f0047..0c2630f22 100644
--- a/libnd4j/include/system/op_boilerplate.h
+++ b/libnd4j/include/system/op_boilerplate.h
@@ -118,8 +118,8 @@
 #endif
 
 
-#define ELEMENT_THRESHOLD sd::Environment::getInstance()->elementwiseThreshold()
-#define TAD_THRESHOLD sd::Environment::getInstance()->tadThreshold()
+#define ELEMENT_THRESHOLD sd::Environment::getInstance().elementwiseThreshold()
+#define TAD_THRESHOLD sd::Environment::getInstance().tadThreshold()
 
 #define SHAPELIST(...)  new ShapeList({__VA_ARGS__}, block.workspace() != nullptr)
 
@@ -129,8 +129,8 @@
 #define PRINT_FIRST(...)    printf(__VA_ARGS__); fflush(stdout)
 #endif
 
-#define DEBUG_CALL(STREAM)      if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; }
-#define DEBUG_KERNEL(STREAM, OP_NUM)       if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString<int>(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString<int>(__LINE__); throw std::runtime_error(tOp.c_str()); }; }
+#define DEBUG_CALL(STREAM)      if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; }
+#define DEBUG_KERNEL(STREAM, OP_NUM)       if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString<int>(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString<int>(__LINE__); throw std::runtime_error(tOp.c_str()); }; }
 
 
 #define LAUNCH(A, B, C, D) <<<A, B, C, D>>>
@@ -1112,7 +1112,7 @@
 
 #define _EXPAND_OP_CALL_1(NAME, TYPE, PARAMZ, NUM_A, TYPE_A) NAME<TYPE_A<TYPE>>PARAMZ;
 #define _EXPAND_OP_DIRECT(PARAMZ, NUM_A, TYPE_A)  case NUM_A: { z = TYPE_A<T>::op PARAMZ; break; }
-#define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance()->storeOperation(TYPE, #TYPE_A, NUM_A);
+#define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance().storeOperation(TYPE, #TYPE_A, NUM_A);
 
 #define _EXPAND_FACTORY_CALL(TYPE, LAYER_ID, LAYER_NAME, ACTIVATION_ID, ACTIVATION_NAME) if (activationNum == ACTIVATION_ID && layerNum == LAYER_ID) { return new LAYER_NAME<TYPE, ACTIVATION_NAME<TYPE>>(); };
 
@@ -1256,7 +1256,7 @@
                         struct __registrator_##NAME {\
                             __registrator_##NAME() {\
                                 OpName *ptr = new OpName(); \
-                                OpRegistrator::getInstance()->registerOperation(ptr); \
+                                OpRegistrator::getInstance().registerOperation(ptr); \
                             }\
                         };\
                         static sd::ops::__registrator_##NAME<NAME> zzz_register_opd_##NAME;
@@ -1269,7 +1269,7 @@
                         struct __registrator_##NAME {\
                             __registrator_##NAME() {\
                                 OpName *ptr = new OpName(); \
-                                OpRegistrator::getInstance()->registerOperation(ptr); \
+                                OpRegistrator::getInstance().registerOperation(ptr); \
                             }\
                         };\
                         static sd::ops::__registrator_##NAME<NAME> zzz_register_opd_##NAME;
@@ -1332,7 +1332,7 @@
                                                     auto shapeList = SHAPELIST(); \
                                                     auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); \
                                                     for (int e = 0; e < opLimit; e++) { \
-                                                        auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
+                                                        auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
                                                         shapeList->push_back(newshape); \
                                                     } \
                                                     return shapeList; \
@@ -1343,14 +1343,14 @@
 #define DECLARE_SYN(NAME, ORIGINAL) template <typename OpName>  \
                                     struct __registratorSynonym_##NAME {\
                                         __registratorSynonym_##NAME(const char *name, const char *oname) {\
-                                            auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance()->getOperation(oname)); \
+                                            auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance().getOperation(oname)); \
                                             if (ptr == nullptr) { \
                                                 std::string newName(name); \
                                                 std::string oldName(oname); \
-                                                OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName);\
+                                                OpRegistrator::getInstance().updateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName);\
                                                 return;\
                                             }\
-                                            OpRegistrator::getInstance()->registerOperation(name, ptr);\
+                                            OpRegistrator::getInstance().registerOperation(name, ptr);\
                                             }\
                                         };\
                                         static sd::ops::__registratorSynonym_##NAME<ORIGINAL> zzz_register_opd_##NAME(#NAME, #ORIGINAL)
@@ -1394,7 +1394,7 @@
                                                                                     auto shapeList = SHAPELIST(); \
                                                                                     auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs(); \
                                                                                     for (int e = 0; e < opLimit; e++) { \
-                                                                                        auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
+                                                                                        auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
                                                                                         shapeList->push_back(newshape); \
                                                                                     } \
                                                                                     return shapeList; \
@@ -1484,8 +1484,8 @@
 #else
 
 // we intentionally add 8 tail bytes here to avoid problems with atomic operations
-#define ALLOCATE_SPECIAL(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {auto erc_##VARIABLE = cudaMalloc(reinterpret_cast<void**>(&VARIABLE), LENGTH * sizeof(TT) + 8); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] allocation failed", erc_##VARIABLE);} else { sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::DEVICE, VARIABLE, LENGTH * sizeof(TT)); }; } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(sd::memory::MemoryType::DEVICE, LENGTH * sizeof(TT) + 8)); }
-#define RELEASE_SPECIAL(VARIABLE, WORKSPACE) if (VARIABLE != nullptr) {if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); auto erc_##VARIABLE = cudaFree(reinterpret_cast<void *>(VARIABLE));  if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] deallocation failed", erc_##VARIABLE);}; }; };
+#define ALLOCATE_SPECIAL(VARIABLE, WORKSPACE, LENGTH, TT) if (WORKSPACE == nullptr) {auto erc_##VARIABLE = cudaMalloc(reinterpret_cast<void**>(&VARIABLE), LENGTH * sizeof(TT) + 8); if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] allocation failed", erc_##VARIABLE);} else { sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::DEVICE, VARIABLE, LENGTH * sizeof(TT)); }; } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(sd::memory::MemoryType::DEVICE, LENGTH * sizeof(TT) + 8)); }
+#define RELEASE_SPECIAL(VARIABLE, WORKSPACE) if (VARIABLE != nullptr) {if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); auto erc_##VARIABLE = cudaFree(reinterpret_cast<void *>(VARIABLE));  if (erc_##VARIABLE != 0) {throw cuda_exception::build("[DEVICE] deallocation failed", erc_##VARIABLE);}; }; };
 
 #endif
 
@@ -1503,12 +1503,12 @@
 
 #else
 
-#define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT)   if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT));
-#define RELEASE(VARIABLE, WORKSPACE)    if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); delete[] VARIABLE;};
+#define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT)   if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT));
+#define RELEASE(VARIABLE, WORKSPACE)    if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); delete[] VARIABLE;};
 
 #endif
 
-#define CONSTANT(SHAPE) ConstantShapeHelper::getInstance()->createFromExisting(SHAPE, block.workspace())
+#define CONSTANT(SHAPE) ConstantShapeHelper::getInstance().createFromExisting(SHAPE, block.workspace())
 
 
 
diff --git a/libnd4j/include/system/platform_boilerplate.h b/libnd4j/include/system/platform_boilerplate.h
index bdbb1a051..b74a0530f 100644
--- a/libnd4j/include/system/platform_boilerplate.h
+++ b/libnd4j/include/system/platform_boilerplate.h
@@ -40,7 +40,7 @@
 #define PLATFORM_IMPL_F(NAME, ENGINE, CNAME)         struct ND4J_EXPORT __registratorPlatformHelper_##CNAME { \
                                                         __registratorPlatformHelper_##CNAME() { \
                                                             auto helper = new PLATFORM_##CNAME(); \
-                                                            OpRegistrator::getInstance()->registerHelper(helper); \
+                                                            OpRegistrator::getInstance().registerHelper(helper); \
                                                         } \
                                                     }; \
                                                     static __registratorPlatformHelper_##CNAME platformHelper_##CNAME; \
diff --git a/libnd4j/minifier/minifier.cpp b/libnd4j/minifier/minifier.cpp
index 7846c1846..043f2b696 100644
--- a/libnd4j/minifier/minifier.cpp
+++ b/libnd4j/minifier/minifier.cpp
@@ -92,7 +92,7 @@ main(int argc, char *argv[]) {
         arch_arg = opt.arch();
     
     std::vector<OpDescriptor> descriptors;
-    nd4j_printf("Total available operations: %i\n", OpRegistrator::getInstance()->numberOfOperations());
+    nd4j_printf("Total available operations: %i\n", OpRegistrator::getInstance().numberOfOperations());
 
     for (auto file: opt.files()) {
         // all files will be checked for accessibility & size
diff --git a/libnd4j/server/GraphServer.cpp b/libnd4j/server/GraphServer.cpp
index a9e8c3ddc..b7615dd5c 100644
--- a/libnd4j/server/GraphServer.cpp
+++ b/libnd4j/server/GraphServer.cpp
@@ -43,7 +43,7 @@ namespace sd {
                     auto graph = new Graph<float>(flat_graph);
 
                     // single data type for now
-                    GraphHolder::getInstance()->registerGraph<float>(flat_graph->id(), graph);
+                    GraphHolder::getInstance().registerGraph<float>(flat_graph->id(), graph);
 
                     // sending out OK response
                     auto response_offset = CreateFlatResponse(mb_, 0);
@@ -66,7 +66,7 @@ namespace sd {
                     auto graph = new Graph<float>(flat_graph);
 
                     // single data type for now
-                    GraphHolder::getInstance()->replaceGraph(flat_graph->id(), graph);
+                    GraphHolder::getInstance().replaceGraph(flat_graph->id(), graph);
 
                     // sending out OK response
                     auto response_offset = CreateFlatResponse(mb_, 0);
@@ -91,7 +91,7 @@ namespace sd {
                     auto request = request_msg->GetRoot();
 
                     // dropping out graph (any datatype)
-                    GraphHolder::getInstance()->dropGraphAny(request->id());
+                    GraphHolder::getInstance().dropGraphAny(request->id());
 
                     // sending out OK response
                     auto response_offset = CreateFlatResponse(mb_, 0);
@@ -111,7 +111,7 @@ namespace sd {
 
                 try {
                     // GraphHolder
-                    auto response_offset = GraphHolder::getInstance()->execute(request->id(), mb_, request);
+                    auto response_offset = GraphHolder::getInstance().execute(request->id(), mb_, request);
 
                     mb_.Finish(response_offset);
                     *response_msg = mb_.ReleaseMessage<FlatResult>();
@@ -181,7 +181,7 @@ int main(int argc, char *argv[]) {
     if(cmdOptionExists(argv, argv+argc, "-f")) {
         auto file = getCmdOption(argv, argv + argc, "-f");
         auto graph = GraphExecutioner<float>::importFromFlatBuffers(file);
-        sd::graph::GraphHolder::getInstance()->registerGraph<float>(0L, graph);
+        sd::graph::GraphHolder::getInstance().registerGraph<float>(0L, graph);
     }
 
     RunServer(port);
diff --git a/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp b/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp
index 00752ca0f..5167abcd1 100644
--- a/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ConditionalTests.cpp
@@ -30,13 +30,13 @@ using namespace sd::graph;
 class ConditionalTests : public testing::Test {
 public:
     ConditionalTests(){
-        //Environment::getInstance()->setVerbose(true);
-        //Environment::getInstance()->setDebug(true);
+        //Environment::getInstance().setVerbose(true);
+        //Environment::getInstance().setDebug(true);
     }
 
     ~ConditionalTests(){
-        //Environment::getInstance()->setVerbose(false);
-        //Environment::getInstance()->setDebug(false);
+        //Environment::getInstance().setVerbose(false);
+        //Environment::getInstance().setDebug(false);
     }
 };
 
@@ -139,8 +139,8 @@ TEST_F(ConditionalTests, Flat_Test_1) {
  * Condition is True
  */
 TEST_F(ConditionalTests, Flat_Test_2) {
-    Environment::getInstance()->setDebug(true);
-    Environment::getInstance()->setVerbose(true);
+    Environment::getInstance().setDebug(true);
+    Environment::getInstance().setVerbose(true);
     sd::ops::identity op0;
 
     auto graph = GraphExecutioner::importFromFlatBuffers("./resources/simpleif_0.fb");
diff --git a/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp b/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp
index 5b747ab5b..a9a42ac88 100644
--- a/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ConstantShapeHelperTests.cpp
@@ -45,15 +45,15 @@ public:
 };
 
 TEST_F(ConstantShapeHelperTests, test_cachedAmount_1) {
-    auto ttlBefore = ConstantShapeHelper::getInstance()->totalCachedEntries();
+    auto ttlBefore = ConstantShapeHelper::getInstance().totalCachedEntries();
 
     auto arrayA = NDArrayFactory::create<bool>('c', {7, 11, 17, 23, 31, 43});
 
-    auto ttlMiddle = ConstantShapeHelper::getInstance()->totalCachedEntries();
+    auto ttlMiddle = ConstantShapeHelper::getInstance().totalCachedEntries();
 
     auto arrayB = NDArrayFactory::create<bool>('c', {7, 11, 17, 23, 31, 43});
 
-    auto ttlAfter = ConstantShapeHelper::getInstance()->totalCachedEntries();
+    auto ttlAfter = ConstantShapeHelper::getInstance().totalCachedEntries();
 
     ASSERT_TRUE(ttlBefore <= ttlMiddle);
     ASSERT_EQ(ttlMiddle, ttlAfter);
@@ -61,15 +61,15 @@ TEST_F(ConstantShapeHelperTests, test_cachedAmount_1) {
 
 TEST_F(ConstantTadHelperTests, test_cachedAmount_1) {
     auto arrayA = NDArrayFactory::create<bool>('c', {7, 11, 17, 23, 31, 43});
-    auto ttlBefore = ConstantTadHelper::getInstance()->totalCachedEntries();
+    auto ttlBefore = ConstantTadHelper::getInstance().totalCachedEntries();
 
-    auto packAA = ConstantTadHelper::getInstance()->tadForDimensions(arrayA.shapeInfo(), {3, 4});
+    auto packAA = ConstantTadHelper::getInstance().tadForDimensions(arrayA.shapeInfo(), {3, 4});
 
-    auto ttlMiddle = ConstantTadHelper::getInstance()->totalCachedEntries();
+    auto ttlMiddle = ConstantTadHelper::getInstance().totalCachedEntries();
 
-    auto packAB = ConstantTadHelper::getInstance()->tadForDimensions(arrayA.shapeInfo(), {3, 4});
+    auto packAB = ConstantTadHelper::getInstance().tadForDimensions(arrayA.shapeInfo(), {3, 4});
 
-    auto ttlAfter = ConstantTadHelper::getInstance()->totalCachedEntries();
+    auto ttlAfter = ConstantTadHelper::getInstance().totalCachedEntries();
 
     ASSERT_TRUE(ttlBefore <= ttlMiddle);
     ASSERT_EQ(ttlMiddle, ttlAfter);
@@ -88,13 +88,13 @@ TEST_F(ConstantShapeHelperTests, basic_test_1) {
     ASSERT_EQ(sd::DataType::BFLOAT16, descriptor.dataType());
     ASSERT_FALSE(descriptor.isEmpty());
 
-    ASSERT_FALSE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(descriptor));
+    ASSERT_FALSE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(descriptor));
 
-    auto buffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor);
+    auto buffer = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor);
 
-    ASSERT_TRUE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(descriptor));
+    ASSERT_TRUE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(descriptor));
 
-    auto buffer2 = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor2);
+    auto buffer2 = ConstantShapeHelper::getInstance().bufferForShapeInfo(descriptor2);
 
 
     ASSERT_TRUE(buffer.primary() != nullptr);
@@ -109,14 +109,14 @@ TEST_F(ConstantShapeHelperTests, stress_test_1) {
     for (auto x = 0; x < 1000; x++) {
         auto ptr = ShapeBuilders::createShapeInfo(sd::DataType::FLOAT32, 'c', {5, x + 10, x + 1});
         ShapeDescriptor descriptor(ptr);
-        ConstantShapeHelper::getInstance()->createShapeInfo(descriptor);
+        ConstantShapeHelper::getInstance().createShapeInfo(descriptor);
         delete [] ptr;
     }
     ShapeDescriptor aShape(sd::DataType::FLOAT32, 'c',  {(Nd4jLong)5, (Nd4jLong)382, (Nd4jLong)373});
-//    nd4j_printf("%d\n", ConstantShapeHelper::getInstance()->cachedEntriesForDevice(0));
+//    nd4j_printf("%d\n", ConstantShapeHelper::getInstance().cachedEntriesForDevice(0));
 
     auto timeStart = std::chrono::system_clock::now();
-    ASSERT_TRUE(ConstantShapeHelper::getInstance()->checkBufferExistenceForShapeInfo(aShape));
+    ASSERT_TRUE(ConstantShapeHelper::getInstance().checkBufferExistenceForShapeInfo(aShape));
     auto timeEnd = std::chrono::system_clock::now();
 
     auto outerTime = std::chrono::duration_cast<std::chrono::nanoseconds>(timeEnd - timeStart).count();
@@ -146,7 +146,7 @@ TEST_F(ConstantShapeHelperTests, basic_test_4) {
 #ifdef __CUDABLAS__
     ASSERT_TRUE(dup->specialShapeInfo() != nullptr);
     PointersManager manager(sd::LaunchContext ::defaultContext(), "test");
-    // manager.printDevContentOnDev<Nd4jLong>(dup->specialShapeInfo(), shape::shapeInfoLength(2), 0);
+    // manager.printDevContentOnDev<Nd4jLong>(dup->special(), shape::shapeInfoLength(2), 0);
 #endif
 
     delete array;
@@ -195,14 +195,14 @@ TEST_F(ConstantHelperTests, basic_test_1) {
 
     ConstantDescriptor descriptor({1, 2, 3});
 
-    ConstantDataBuffer* fBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::FLOAT32);
+    ConstantDataBuffer* fBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::FLOAT32);
     auto fPtr = fBuffer->primaryAsT<float>();
 
     ASSERT_NEAR(1.f, fPtr[0], 1e-5);
     ASSERT_NEAR(2.f, fPtr[1], 1e-5);
     ASSERT_NEAR(3.f, fPtr[2], 1e-5);
 
-    auto iBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::INT32);
+    auto iBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::INT32);
     auto iPtr = iBuffer->primaryAsT<int>();
 
     ASSERT_EQ(1, iPtr[0]);
@@ -215,14 +215,14 @@ TEST_F(ConstantHelperTests, basic_test_2) {
     double array[] = {1., 2., 3.};
     ConstantDescriptor descriptor(array, 3);
 
-    ConstantDataBuffer* fBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::FLOAT32);
+    ConstantDataBuffer* fBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::FLOAT32);
     auto fPtr = fBuffer->primaryAsT<float>();
 
     ASSERT_NEAR(1.f, fPtr[0], 1e-5);
     ASSERT_NEAR(2.f, fPtr[1], 1e-5);
     ASSERT_NEAR(3.f, fPtr[2], 1e-5);
 
-    auto iBuffer = ConstantHelper::getInstance()->constantBuffer(descriptor, sd::DataType::INT32);
+    auto iBuffer = ConstantHelper::getInstance().constantBuffer(descriptor, sd::DataType::INT32);
     auto iPtr = iBuffer->primaryAsT<int>();
 
     ASSERT_EQ(1, iPtr[0]);
diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
index 4438e5fe6..b87985458 100644
--- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
@@ -2013,7 +2013,6 @@ TYPED_TEST(TypedConvolutionTests1, conv3d_test9) {
 
     ASSERT_TRUE(exp.isSameShape(z));
 
-    shapes->destroy();
     delete shapes;
 }
 
diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu
index cbcbe2c15..d8ed2a264 100644
--- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu
+++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu
@@ -254,7 +254,7 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execReduce3Scalar_1) {
 
-	 if (!Environment::getInstance()->isExperimentalBuild())
+	 if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {1,2,3,4}, sd::DataType::INT32);
@@ -970,7 +970,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execScalar_1) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
     NDArray x('c', {2,3},  {0,1,2,3,4,5}, sd::DataType::INT64);
@@ -1005,7 +1005,7 @@ TEST_F(CudaBasicsTests1, execScalar_1) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execScalar_2) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
     NDArray x('c', {2,3},  {-1,-2,-3,-4,-5,-6}, sd::DataType::INT64);
@@ -1041,7 +1041,7 @@ TEST_F(CudaBasicsTests1, execScalar_2) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execScalar_3) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
     NDArray x('c', {2,3,2},  {0,1,2,3,4,5,6,7,8,9,10,11}, sd::DataType::INT64);
@@ -1192,7 +1192,7 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execBroadcast_1) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
 	NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32);
@@ -1252,7 +1252,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execBroadcast_2) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
 	NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32);
@@ -1429,7 +1429,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(CudaBasicsTests1, execPairwiseTransform_1) {
 
-	if (!Environment::getInstance()->isExperimentalBuild())
+	if (!Environment::getInstance().isExperimentalBuild())
         return;
     	
 	NDArray x('c', {2,2,2}, {1,5,3,7,2,6,4,8}, sd::DataType::INT32);
@@ -2544,7 +2544,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_1) {
     NDArray z('c', {3}, {100,100,100}, sd::DataType::DOUBLE);
    
     std::vector<int> dimensions = {0,1};
-    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions);
+    auto packX = ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions);
     LaunchContext* context = x.getContext();
 
 	x.syncToDevice();
@@ -2858,7 +2858,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) {
 	NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation,
 								nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(),
 								nullptr, 								
-								nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
+								nullptr, z.shapeInfo(), z.specialBuffer(), z.special(),
 								(int*)devicePtrs[0], dimensions.size(), 
 								(Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2],
 								true);
@@ -2941,13 +2941,13 @@ TEST_F(CudaBasicsTests1, execRandom_1) {
 //    cudaResult = cudaStreamCreate(&stream);	ASSERT_EQ(0, cudaResult);
 //    LaunchContext lc(&stream);
 //
-//	//	::execRandom(extraPointers, random::GaussianDistribution, &gen, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &extra);
+//	//	::execRandom(extraPointers, random::GaussianDistribution, &gen, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.special(), &extra);
 //	// call cuda kernel which calculates result
 //	NativeOpExecutioner::execRandom(&lc, sd::random::GaussianDistribution,
 //								&gen,
-//								nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
-//								nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
-//								nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
+//								nullptr, z.shapeInfo(), z.specialBuffer(), z.special(),
+//								nullptr, z.shapeInfo(), z.specialBuffer(), z.special(),
+//								nullptr, z.shapeInfo(), z.specialBuffer(), z.special(),
 //								extraArguments.argumentsAsT(z.dataType()));
 //
 //	cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult);
diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu
index b425ffcbb..28102cad5 100644
--- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu
+++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu
@@ -40,7 +40,7 @@ public:
 };
 
 TEST_F(CudaBasicsTests2, test_devices_1) {
-	auto caps = Environment::getInstance()->capabilities();
+	auto caps = Environment::getInstance().capabilities();
 	ASSERT_FALSE(caps.empty());
 }
 
@@ -259,7 +259,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_12) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 4;
 	const Nd4jLong K = 4;
@@ -282,7 +282,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_13) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -304,7 +304,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_14) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -326,7 +326,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_15) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -349,7 +349,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_16) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -371,7 +371,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_17) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -393,7 +393,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_18) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -415,7 +415,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_19) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
@@ -437,7 +437,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_20) {
 
 	int devCnt = 0;
 	cudaGetDevice(&devCnt);
-	if(Environment::getInstance()->capabilities()[devCnt].first() < 5.3) return;
+	if(Environment::getInstance().capabilities()[devCnt].first() < 5.3) return;
 
 	const Nd4jLong M = 3;
 	const Nd4jLong K = 4;
diff --git a/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp b/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp
index 42ab543b1..b22f9e765 100644
--- a/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DataBufferTests.cpp
@@ -39,31 +39,31 @@ public:
 };
 
 TEST_F(DataBufferTests, test_alloc_limit_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto deviceId = AffinityManager::currentDeviceId();
-    auto odLimit = MemoryCounter::getInstance()->deviceLimit(deviceId);
-    auto ogLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::HOST);
-    auto odUse = MemoryCounter::getInstance()->allocatedDevice(deviceId);
-    auto ogUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST);
+    auto odLimit = MemoryCounter::getInstance().deviceLimit(deviceId);
+    auto ogLimit = MemoryCounter::getInstance().groupLimit(MemoryType::HOST);
+    auto odUse = MemoryCounter::getInstance().allocatedDevice(deviceId);
+    auto ogUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST);
 
     auto limitSize = odUse + (150 * 1024 * 1024);
     auto allocSize = 100000000;
 
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit + limitSize);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, odLimit + limitSize);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit + limitSize);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, odLimit + limitSize);
 
     DataBuffer buffer(allocSize, DataType::INT32);
 
     // separately testing per-device limits and group limits
-    ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance()->allocatedDevice(deviceId));
-    ASSERT_EQ(ogUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST));
+    ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance().allocatedDevice(deviceId));
+    ASSERT_EQ(ogUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST));
 
 
     // setting smaller limits, to make sure next allocation fails with OOM exception
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, allocSize - 100);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, allocSize - 100);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, allocSize - 100);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, allocSize - 100);
 
     try {
         DataBuffer bufferFailed(allocSize, DataType::INT32);
@@ -73,6 +73,6 @@ TEST_F(DataBufferTests, test_alloc_limit_1) {
     }
 
     // restore original limits, so subsequent tests do not fail
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, odLimit);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, odLimit);
 }
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu b/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu
index 730ade824..6f7d38ede 100644
--- a/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu
+++ b/libnd4j/tests_cpu/layers_tests/DataBufferTestsCuda.cu
@@ -42,33 +42,33 @@ public:
 TEST_F(DataBufferTestsCuda, test_alloc_limit_1) {
     auto deviceId = AffinityManager::currentDeviceId();
 
-    auto odLimit = MemoryCounter::getInstance()->deviceLimit(deviceId);
+    auto odLimit = MemoryCounter::getInstance().deviceLimit(deviceId);
 
-    auto opLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::HOST);
-    auto osLimit = MemoryCounter::getInstance()->groupLimit(MemoryType::DEVICE);
+    auto opLimit = MemoryCounter::getInstance().groupLimit(MemoryType::HOST);
+    auto osLimit = MemoryCounter::getInstance().groupLimit(MemoryType::DEVICE);
 
-    auto odUse = MemoryCounter::getInstance()->allocatedDevice(deviceId);
+    auto odUse = MemoryCounter::getInstance().allocatedDevice(deviceId);
 
-    auto opUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST);
-    auto osUse = MemoryCounter::getInstance()->allocatedGroup(MemoryType::DEVICE);
+    auto opUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST);
+    auto osUse = MemoryCounter::getInstance().allocatedGroup(MemoryType::DEVICE);
 
     auto limitSize = odUse + 150000000;
     auto allocSize = 100000000;
 
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit + limitSize);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, opLimit + limitSize);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, osLimit + limitSize);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit + limitSize);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, opLimit + limitSize);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, osLimit + limitSize);
 
     DataBuffer buffer(allocSize, DataType::INT32, nullptr, true);
 
     // separately testing per-device limits and group limits
-    ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance()->allocatedDevice(deviceId));
-    ASSERT_EQ(opUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::HOST));
-    ASSERT_EQ(osUse + allocSize, MemoryCounter::getInstance()->allocatedGroup(MemoryType::DEVICE));
+    ASSERT_EQ(odUse + allocSize, MemoryCounter::getInstance().allocatedDevice(deviceId));
+    ASSERT_EQ(opUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::HOST));
+    ASSERT_EQ(osUse + allocSize, MemoryCounter::getInstance().allocatedGroup(MemoryType::DEVICE));
 
     // setting smaller limits, to make sure next allocation fails with OOM exception
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, allocSize - 100);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, allocSize - 100);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, allocSize - 100);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, allocSize - 100);
 
 
     // this allocation should fail, since we're allocating too much
@@ -82,8 +82,8 @@ TEST_F(DataBufferTestsCuda, test_alloc_limit_1) {
     //
 
     // restore original limits, so subsequent tests do not fail
-    MemoryCounter::getInstance()->setDeviceLimit(deviceId, odLimit);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::HOST, opLimit);
-    MemoryCounter::getInstance()->setGroupLimit(MemoryType::DEVICE, osLimit);
+    MemoryCounter::getInstance().setDeviceLimit(deviceId, odLimit);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::HOST, opLimit);
+    MemoryCounter::getInstance().setGroupLimit(MemoryType::DEVICE, osLimit);
 }
  */
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp
index 959362c4d..a5715fd01 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp
@@ -55,11 +55,11 @@ public:
     const int oW = (iW - kW - (kW - 1) * (dW - 1) + 2 * pW) / sW + 1;     // output width
 
     DeclarableOpsTests1() {
-        sd::memory::MemoryTracker::getInstance()->reset();
+        sd::memory::MemoryTracker::getInstance().reset();
     }
 
     ~DeclarableOpsTests1() {
-        sd::memory::MemoryTracker::getInstance()->summarize();
+        sd::memory::MemoryTracker::getInstance().summarize();
     }
 };
 
@@ -144,7 +144,7 @@ TEST_F(DeclarableOpsTests1, BasicInitialization1) {
 
 //////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests1, BasicInitialization2) {
-    auto op = sd::ops::OpRegistrator::getInstance()->getOperation("concat");
+    auto op = sd::ops::OpRegistrator::getInstance().getOperation("concat");
 
     ASSERT_TRUE(op != nullptr);
     std::string expName("concat");
@@ -217,19 +217,19 @@ TEST_F(DeclarableOpsTests1, AXpY_Test_1) {
 }
 
 TEST_F(DeclarableOpsTests1, BasicInitialization3) {
-    auto op1 = sd::ops::OpRegistrator::getInstance()->getOperation("concat");
+    auto op1 = sd::ops::OpRegistrator::getInstance().getOperation("concat");
     std::string expName("concat");
-    auto hash = sd::ops::HashHelper::getInstance()->getLongHash(expName);
+    auto hash = sd::ops::HashHelper::getInstance().getLongHash(expName);
 
-    auto op2 = sd::ops::OpRegistrator::getInstance()->getOperation(hash);
+    auto op2 = sd::ops::OpRegistrator::getInstance().getOperation(hash);
 
     ASSERT_TRUE(op1 == op2);
 }
 
 
 TEST_F(DeclarableOpsTests1, SynonymInitialization2) {
-    auto op = sd::ops::OpRegistrator::getInstance()->getOperation("Mul");
-    auto op2 = sd::ops::OpRegistrator::getInstance()->getOperation("multiply");
+    auto op = sd::ops::OpRegistrator::getInstance().getOperation("Mul");
+    auto op2 = sd::ops::OpRegistrator::getInstance().getOperation("multiply");
 
     ASSERT_TRUE(op != nullptr);
     std::string expName("multiply");
@@ -597,7 +597,7 @@ TEST_F(DeclarableOpsTests1, TestTensorDot17) {
 
 //////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests1, DivergentCheck1) {
-    auto op = sd::ops::OpRegistrator::getInstance()->getOperation("switch");
+    auto op = sd::ops::OpRegistrator::getInstance().getOperation("switch");
 
     ASSERT_TRUE(op != nullptr);
     std::string expName("Switch");
@@ -1695,7 +1695,7 @@ TEST_F(DeclarableOpsTests1, Test_Cast_1) {
 
 //////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests1, TestRegistrator1) {
-    auto res = sd::ops::OpRegistrator::getInstance()->getAllCustomOperations();
+    auto res = sd::ops::OpRegistrator::getInstance().getAllCustomOperations();
 }
 
 // //////////////////////////////////////////////////////////////////////
@@ -1713,7 +1713,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) {
 //     z->assign(120.0f);
 //     std::string opName("add");
 
-//     auto hash = sd::ops::HashHelper::getInstance()->getInstance()->getLongHash(opName);
+//     auto hash = sd::ops::HashHelper::getInstance().getInstance()->getLongHash(opName);
 
 //     auto inputBuffers = new Nd4jPointer[2];
 //     auto inputShapes = new Nd4jPointer[2];
@@ -1763,7 +1763,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) {
 
 //     std::string opName("add");
 
-//     auto hash = sd::ops::HashHelper::getInstance()->getInstance()->getLongHash(opName);
+//     auto hash = sd::ops::HashHelper::getInstance().getInstance()->getLongHash(opName);
 
 //     auto inputBuffers = new Nd4jPointer[2];
 //     auto inputShapes = new Nd4jPointer[2];
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
index 66762f79d..9e5281afe 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp
@@ -780,8 +780,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) {
 
     std::vector<int> dims = {1};
 
-    auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims);
-    auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims);
+    auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims);
+    auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims);
 
     Nd4jPointer nativeStart[2];
 
@@ -816,8 +816,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) {
 
     std::vector<int> dims = {1};
 
-    auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims);
-    auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims);
+    auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims);
+    auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims);
 
     Nd4jPointer nativeStart[2];
 #ifdef __CUDABLAS__
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
index c7222e6f7..639d90389 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
@@ -2717,7 +2717,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test9) {
     int* dims = reinterpret_cast<int*>(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int)));
     input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions);
     NDArray::prepareSpecialUse({&variance}, {&input});
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
     NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false);
     manager.synchronize();
     NDArray::registerSpecialUse({&variance}, {&input});
@@ -2768,7 +2768,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test10) {
     int* dims = reinterpret_cast<int*>(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int)));
     input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions);
     NDArray::prepareSpecialUse({&variance}, {&input});
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
     NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false);
     manager.synchronize();
     NDArray::registerSpecialUse({&variance}, {&input});
@@ -2831,7 +2831,7 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test11) {
     int* dims = reinterpret_cast<int*>(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int)));
     input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions, true);
     NDArray::prepareSpecialUse({&variance}, {&input});
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions);
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(input.shapeInfo(), dimensions);
     NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false);
     manager.synchronize();
     NDArray::registerSpecialUse({&variance}, {&input});
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp
index b4c9839ab..ef35bfa72 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp
@@ -40,7 +40,7 @@ public:
 
 TEST_F(DeclarableOpsTests14, Test_Validation_Edge_1) {
     auto x = NDArrayFactory::create<int>('c', {2}, {2, 2});
-    auto exp = NDArrayFactory::create('c', {2, 2}, Environment::getInstance()->defaultFloatDataType());
+    auto exp = NDArrayFactory::create('c', {2, 2}, Environment::getInstance().defaultFloatDataType());
     exp.assign(4.0f);
 
     sd::ops::fill op;
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp
index 38006dd50..2a099230e 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp
@@ -1945,7 +1945,7 @@ TEST_F(DeclarableOpsTests3, svd_test1) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -1981,7 +1981,7 @@ TEST_F(DeclarableOpsTests3, svd_test2) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2017,7 +2017,7 @@ TEST_F(DeclarableOpsTests3, svd_test3) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2053,7 +2053,7 @@ TEST_F(DeclarableOpsTests3, svd_test4) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2089,7 +2089,7 @@ TEST_F(DeclarableOpsTests3, svd_test5) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2143,7 +2143,7 @@ TEST_F(DeclarableOpsTests3, svd_test6) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2317,7 +2317,7 @@ TEST_F(DeclarableOpsTests3, svd_test7) {
 
     // ASSERT_TRUE(expS.equalsTo(s));
 
-    // if(sd::Environment::getInstance()->isCPU()) {
+    // if(sd::Environment::getInstance().isCPU()) {
     //     ASSERT_TRUE(expU.equalsTo(u));
     //     ASSERT_TRUE(expV.equalsTo(v));
     // }
@@ -2380,7 +2380,7 @@ TEST_F(DeclarableOpsTests3, svd_test9) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2438,7 +2438,7 @@ TEST_F(DeclarableOpsTests3, svd_test10) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
@@ -2482,7 +2482,7 @@ TEST_F(DeclarableOpsTests3, svd_test11) {
 
     ASSERT_TRUE(expS.equalsTo(s));
 
-    if(sd::Environment::getInstance()->isCPU()) {
+    if(sd::Environment::getInstance().isCPU()) {
         ASSERT_TRUE(expU.equalsTo(u));
         ASSERT_TRUE(expV.equalsTo(v));
     }
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp
index 1e877ecc6..56e5e213a 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp
@@ -641,7 +641,7 @@ TEST_F(DeclarableOpsTests4, biasadd_bp_2) {
 }
 
 TEST_F(DeclarableOpsTests4, biasadd_4) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<double>('c', {2, 3});
diff --git a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp
index 81040185d..c142fb9aa 100644
--- a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp
@@ -216,7 +216,7 @@ TEST_F(EmptyTests, test_shaped_empty_3) {
 }
 
 TEST_F(EmptyTests, test_shaped_empty_4) {
-    const auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32);
+    const auto shape = ConstantShapeHelper::getInstance().vectorShapeInfo(0, sd::DataType::FLOAT32);
     NDArray array(shape, true, sd::LaunchContext::defaultContext());
     std::vector<Nd4jLong> shapeOf({0});
 
diff --git a/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp b/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp
index 87ac750b2..aa4a72f70 100644
--- a/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ExtraArgumentsTests.cpp
@@ -34,7 +34,7 @@ public:
 };
 
 TEST_F(ExtraArgumentsTests, Basic_Test_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     ExtraArguments args({1.0, 2.0, 3.0});
diff --git a/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp b/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp
index bdb8bde68..437edb525 100644
--- a/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/FlatBuffersTests.cpp
@@ -39,15 +39,15 @@ public:
     Nd4jLong *fShape = new Nd4jLong[8]{2, 2, 2, 1, 2, 8192, 1, 102};
 
     FlatBuffersTest() {
-        Environment::getInstance()->setDebug(false);
-        Environment::getInstance()->setVerbose(false);
-        Environment::getInstance()->setProfiling(false);
+        Environment::getInstance().setDebug(false);
+        Environment::getInstance().setVerbose(false);
+        Environment::getInstance().setProfiling(false);
     }
 
     ~FlatBuffersTest() {
-        Environment::getInstance()->setDebug(false);
-        Environment::getInstance()->setVerbose(false);
-        Environment::getInstance()->setProfiling(false);
+        Environment::getInstance().setDebug(false);
+        Environment::getInstance().setVerbose(false);
+        Environment::getInstance().setProfiling(false);
 
         delete[] cShape;
         delete[] fShape;
@@ -676,8 +676,8 @@ TEST_F(FlatBuffersTest, Test_Stitches) {
 }
 
 TEST_F(FlatBuffersTest, Test_GruDynamicMnist) {
-    sd::Environment::getInstance()->setDebug(false);
-    sd::Environment::getInstance()->setVerbose(false);
+    sd::Environment::getInstance().setDebug(false);
+    sd::Environment::getInstance().setVerbose(false);
 
     auto graph = GraphExecutioner::importFromFlatBuffers("./resources/gru_dynamic_mnist.fb");
     //graph->printOut();
@@ -696,8 +696,8 @@ TEST_F(FlatBuffersTest, Test_GruDynamicMnist) {
 }
 
 TEST_F(FlatBuffersTest, Test_Non2D_2) {
-    sd::Environment::getInstance()->setDebug(false);
-    sd::Environment::getInstance()->setVerbose(false);
+    sd::Environment::getInstance().setDebug(false);
+    sd::Environment::getInstance().setVerbose(false);
     sd::ops::realdiv op0;
 
     auto graph = GraphExecutioner::importFromFlatBuffers("./resources/non2d_2.fb");
@@ -711,8 +711,8 @@ TEST_F(FlatBuffersTest, Test_Non2D_2) {
 
 
 TEST_F(FlatBuffersTest, Test_TensorDotMisc) {
-    Environment::getInstance()->setVerbose(false);
-    Environment::getInstance()->setDebug(false);
+    Environment::getInstance().setVerbose(false);
+    Environment::getInstance().setDebug(false);
 
     auto e = NDArrayFactory::create<float>('c', {1, 3, 16, 20}, {4.f, 6.f, 6.f, 5.f, 6.f, 4.f, 2.f, 3.f, 5.f, 5.f, 1.f, 4.f, 6.f, 3.f, 2.f, 1.f, 5.f, 4.f, 4.f, 4.f, 4.f, 4.f, 3.f, 4.f, 2.f, 3.f, 3.f, 5.f, 3.f, 6.f, 5.f, 4.f, 4.f, 3.f, 6.f, 1.f, 2.f, 4.f, 2.f, 6.f, 4.f, 2.f, 3.f, 2.f, 3.f, 1.f, 2.f, 4.f, 3.f, 5.f, 3.f, 3.f, 5.f, 2.f, 6.f, 3.f, 4.f, 4.f, 4.f, 4.f, 6.f, 4.f, 5.f, 2.f, 5.f, 5.f, 5.f, 5.f, 2.f, 4.f, 4.f, 4.f, 5.f, 4.f, 3.f, 6.f, 3.f, 4.f, 5.f, 2.f, 5.f, 4.f, 4.f, 5.f, 4.f, 3.f, 4.f, 5.f, 5.f, 3.f, 5.f, 6.f, 6.f, 3.f, 4.f, 5.f, 7.f, 6.f, 5.f, 2.f, 4.f, 5.f, 5.f, 4.f, 5.f, 4.f, 4.f, 6.f, 3.f, 4.f, 5.f, 4.f, 6.f, 2.f, 3.f, 4.f, 3.f, 3.f, 2.f, 2.f, 3.f, 4.f, 7.f, 3.f, 5.f, 4.f, 5.f, 4.f, 4.f, 4.f, 4.f, 6.f, 2.f, 3.f, 2.f, 5.f, 5.f, 4.f, 5.f, 2.f, 2.f, 1.f, 6.f, 2.f, 2.f, 3.f, 4.f, 5.f, 5.f, 3.f, 6.f, 6.f, 4.f, 3.f, 3.f, 3.f, 3.f, 3.f, 4.f, 5.f, 4.f, 4.f, 3.f, 5.f, 2.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 5.f, 8.f, 4.f, 5.f, 3.f, 3.f, 4.f, 4.f, 5.f, 4.f, 5.f, 3.f, 3.f, 7.f, 2.f, 3.f, 2.f, 6.f, 6.f, 4.f, 4.f, 3.f, 5.f, 6.f, 2.f, 4.f, 3.f, 3.f, 4.f, 5.f, 3.f, 3.f, 6.f, 5.f, 3.f, 2.f, 5.f, 4.f, 4.f, 3.f, 5.f, 5.f, 6.f, 7.f, 3.f, 4.f, 3.f, 5.f, 6.f, 7.f, 5.f, 6.f, 5.f, 7.f, 4.f, 6.f, 5.f, 5.f, 6.f, 4.f, 2.f, 5.f, 4.f, 3.f, 4.f, 1.f, 5.f, 5.f, 3.f, 2.f, 2.f, 6.f, 5.f, 5.f, 2.f, 5.f, 2.f, 4.f, 4.f, 5.f, 5.f, 4.f, 3.f, 7.f, 4.f, 5.f, 3.f, 3.f, 3.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 4.f, 2.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 7.f, 2.f, 1.f, 3.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 3.f, 4.f, 2.f, 4.f, 4.f, 4.f, 5.f, 3.f, 5.f, 3.f, 6.f, 6.f, 5.f, 3.f, 5.f, 3.f, 4.f, 3.f, 5.f, 3.f, 5.f, 6.f, 5.f, 3.f, 4.f, 5.f, 5.f, 3.f, 3.f, 3.f, 4.f, 6.f, 4.f, 3.f, 7.f, 4.f, 4.f, 6.f, 7.f, 5.f, 5.f, 3.f, 1.f, 2.f, 5.f, 5.f, 2.f, 5.f, 7.f, 5.f, 3.f, 1.f, 4.f, 6.f, 5.f, 7.f, 5.f, 6.f, 5.f, 6.f, 4.f, 3.f, 3.f, 4.f, 3.f, 4.f, 4.f, 4.f, 4.f, 3.f, 5.f, 2.f, 4.f, 5.f, 2.f, 5.f, 5.f, 4.f, 5.f, 4.f, 5.f, 2.f, 3.f, 5.f, 3.f, 6.f, 3.f, 4.f, 5.f, 3.f, 6.f, 5.f, 5.f, 6.f, 4.f, 6.f, 7.f, 4.f, 5.f, 3.f, 5.f, 4.f, 4.f, 4.f, 2.f, 2.f, 5.f, 3.f, 5.f, 3.f, 4.f, 6.f, 3.f, 5.f, 5.f, 3.f, 5.f, 4.f, 4.f, 4.f, 5.f, 2.f, 3.f, 5.f, 4.f, 2.f, 4.f, 5.f, 4.f, 2.f, 3.f, 4.f, 4.f, 5.f, 5.f, 1.f, 4.f, 4.f, 4.f, 3.f, 4.f, 5.f, 5.f, 8.f, 4.f, 4.f, 4.f, 3.f, 6.f, 2.f, 3.f, 4.f, 4.f, 4.f, 3.f, 2.f, 3.f, 4.f, 8.f, 3.f, 5.f, 5.f, 5.f, 3.f, 3.f, 4.f, 5.f, 7.f, 3.f, 3.f, 3.f, 6.f, 6.f, 5.f, 5.f, 3.f, 4.f, 3.f, 8.f, 3.f, 4.f, 2.f, 3.f, 4.f, 4.f, 3.f, 5.f, 5.f, 3.f, 2.f, 3.f, 3.f, 3.f, 4.f, 4.f, 4.f, 6.f, 6.f, 5.f, 6.f, 4.f, 5.f, 4.f, 6.f, 4.f, 5.f, 5.f, 4.f, 7.f, 3.f, 5.f, 5.f, 3.f, 5.f, 5.f, 6.f, 4.f, 5.f, 4.f, 2.f, 7.f, 2.f, 3.f, 1.f, 4.f, 5.f, 5.f, 4.f, 4.f, 5.f, 7.f, 2.f, 3.f, 3.f, 4.f, 4.f, 5.f, 3.f, 3.f, 6.f, 6.f, 3.f, 2.f, 4.f, 3.f, 3.f, 3.f, 3.f, 4.f, 4.f, 5.f, 1.f, 2.f, 3.f, 3.f, 4.f, 5.f, 4.f, 5.f, 4.f, 5.f, 6.f, 6.f, 6.f, 6.f, 7.f, 4.f, 3.f, 4.f, 5.f, 4.f, 4.f, 2.f, 5.f, 6.f, 4.f, 2.f, 2.f, 6.f, 5.f, 5.f, 1.f, 4.f, 2.f, 3.f, 4.f, 5.f, 5.f, 4.f, 5.f, 9.f, 4.f, 6.f, 4.f, 5.f, 5.f, 3.f, 4.f, 5.f, 5.f, 5.f, 4.f, 3.f, 1.f, 3.f, 4.f, 3.f, 4.f, 4.f, 3.f, 6.f, 2.f, 3.f, 3.f, 2.f, 3.f, 3.f, 4.f, 5.f, 6.f, 5.f, 5.f, 3.f, 4.f, 5.f, 5.f, 4.f, 3.f, 4.f, 3.f, 6.f, 7.f, 6.f, 4.f, 6.f, 4.f, 3.f, 3.f, 4.f, 3.f, 5.f, 5.f, 4.f, 2.f, 3.f, 4.f, 5.f, 3.f, 4.f, 2.f, 4.f, 5.f, 3.f, 3.f, 7.f, 4.f, 2.f, 5.f, 6.f, 5.f, 5.f, 3.f, 1.f, 2.f, 4.f, 4.f, 1.f, 3.f, 6.f, 3.f, 3.f, 1.f, 4.f, 4.f, 4.f, 5.f, 3.f, 4.f, 3.f, 4.f, 2.f, 3.f, 3.f, 4.f, 3.f, 4.f, 3.f, 3.f, 4.f, 2.f, 5.f, 1.f, 3.f, 4.f, 2.f, 6.f, 4.f, 3.f, 4.f, 3.f, 3.f, 1.f, 2.f, 5.f, 2.f, 6.f, 4.f, 5.f, 6.f, 3.f, 6.f, 4.f, 4.f, 5.f, 3.f, 5.f, 6.f, 3.f, 4.f, 2.f, 4.f, 5.f, 5.f, 5.f, 2.f, 3.f, 4.f, 3.f, 5.f, 3.f, 3.f, 9.f, 6.f, 7.f, 7.f, 4.f, 4.f, 3.f, 3.f, 4.f, 4.f, 3.f, 4.f, 6.f, 5.f, 3.f, 5.f, 5.f, 5.f, 2.f, 4.f, 6.f, 7.f, 7.f, 5.f, 3.f, 4.f, 5.f, 4.f, 4.f, 5.f, 5.f, 5.f, 8.f, 4.f, 4.f, 4.f, 3.f, 5.f, 3.f, 3.f, 4.f, 4.f, 5.f, 3.f, 3.f, 2.f, 3.f, 6.f, 2.f, 5.f, 4.f, 4.f, 3.f, 3.f, 3.f, 5.f, 7.f, 2.f, 3.f, 2.f, 5.f, 5.f, 4.f, 4.f, 2.f, 2.f, 1.f, 6.f, 1.f, 2.f, 2.f, 3.f, 5.f, 4.f, 3.f, 5.f, 5.f, 3.f, 2.f, 2.f, 2.f, 2.f, 4.f, 3.f, 4.f, 4.f, 4.f, 4.f, 5.f, 2.f, 4.f, 4.f, 5.f, 2.f, 4.f, 4.f, 5.f, 9.f, 4.f, 5.f, 4.f, 3.f, 5.f, 5.f, 6.f, 4.f, 4.f, 3.f, 3.f, 6.f, 2.f, 3.f, 2.f, 5.f, 6.f, 4.f, 4.f, 3.f, 5.f, 6.f, 4.f, 5.f, 5.f, 6.f, 7.f, 4.f, 2.f, 3.f, 5.f, 4.f, 4.f, 3.f, 5.f, 5.f, 4.f, 3.f, 4.f, 5.f, 4.f, 6.f, 3.f, 4.f, 4.f, 5.f, 6.f, 6.f, 4.f, 6.f, 6.f, 6.f, 5.f, 6.f, 6.f, 7.f, 7.f, 4.f, 3.f, 4.f, 4.f, 4.f, 5.f, 2.f, 5.f, 7.f, 5.f, 2.f, 1.f, 5.f, 5.f, 4.f, 1.f, 4.f, 1.f, 3.f, 3.f, 5.f, 4.f, 4.f, 3.f, 7.f, 3.f, 6.f, 3.f, 3.f, 4.f, 1.f, 3.f, 2.f, 3.f, 3.f, 4.f, 3.f, 1.f, 3.f, 4.f, 2.f, 4.f, 4.f, 2.f, 6.f, 1.f, 2.f, 2.f, 2.f, 3.f, 2.f, 3.f, 3.f, 4.f, 4.f, 4.f, 2.f, 4.f, 4.f, 4.f, 5.f, 5.f, 5.f, 4.f, 8.f, 5.f, 5.f, 3.f, 5.f, 3.f, 3.f, 2.f, 4.f, 3.f, 5.f, 6.f, 5.f, 3.f, 4.f, 5.f, 5.f, 3.f, 4.f, 3.f, 4.f, 8.f, 6.f, 5.f, 9.f, 6.f});
 
diff --git a/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp
index f1f7195e7..a50091840 100644
--- a/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/GraphHolderTests.cpp
@@ -33,13 +33,13 @@ public:
 TEST_F(GraphHolderTests, SimpleTests_1) {
     Graph graph;
     Nd4jLong graphId = 119;
-    GraphHolder::getInstance()->registerGraph(graphId, &graph);
+    GraphHolder::getInstance().registerGraph(graphId, &graph);
 
-    ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId));
 
-    GraphHolder::getInstance()->forgetGraph(graphId);
+    GraphHolder::getInstance().forgetGraph(graphId);
 
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId));
 }
 
 
@@ -47,18 +47,18 @@ TEST_F(GraphHolderTests, SimpleTests_1) {
 TEST_F(GraphHolderTests, SimpleTests_2) {
     auto graph = new Graph;
     Nd4jLong graphId = 117;
-    GraphHolder::getInstance()->registerGraph(graphId, graph);
+    GraphHolder::getInstance().registerGraph(graphId, graph);
 
-    ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId));
 
-    auto graph2 = GraphHolder::getInstance()->cloneGraph(graphId);
+    auto graph2 = GraphHolder::getInstance().cloneGraph(graphId);
 
     ASSERT_TRUE(graph != graph2);
     ASSERT_TRUE(graph2 != nullptr);
 
-    GraphHolder::getInstance()->forgetGraph(graphId);
+    GraphHolder::getInstance().forgetGraph(graphId);
 
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId));
 
     delete graph;
     delete graph2;
@@ -68,18 +68,18 @@ TEST_F(GraphHolderTests, SimpleTests_2) {
 TEST_F(GraphHolderTests, SimpleTests_3) {
     auto graph = new Graph;
     Nd4jLong graphId = 117;
-    GraphHolder::getInstance()->registerGraph(graphId, graph);
+    GraphHolder::getInstance().registerGraph(graphId, graph);
 
-    ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_TRUE(GraphHolder::getInstance().hasGraph(graphId));
 
-    auto graph2 = GraphHolder::getInstance()->cloneGraph(graphId);
+    auto graph2 = GraphHolder::getInstance().cloneGraph(graphId);
 
     ASSERT_TRUE(graph != graph2);
     ASSERT_TRUE(graph2 != nullptr);
 
-    GraphHolder::getInstance()->dropGraph(graphId);
+    GraphHolder::getInstance().dropGraph(graphId);
 
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(graphId));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(graphId));
 
 
     delete graph2;
diff --git a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp
index 878b05712..16c1ed623 100644
--- a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp
@@ -31,13 +31,13 @@ using namespace sd::graph;
 class GraphStateTests : public testing::Test {
 public:
     GraphStateTests() {
-        Environment::getInstance()->setDebug(false);
-        Environment::getInstance()->setVerbose(false);
+        Environment::getInstance().setDebug(false);
+        Environment::getInstance().setVerbose(false);
     };
 
     ~GraphStateTests() {
-        Environment::getInstance()->setDebug(false);
-        Environment::getInstance()->setVerbose(false);
+        Environment::getInstance().setDebug(false);
+        Environment::getInstance().setVerbose(false);
     }
 };
 
diff --git a/libnd4j/tests_cpu/layers_tests/GraphTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphTests.cpp
index 73aac9c3b..6d21b00f2 100644
--- a/libnd4j/tests_cpu/layers_tests/GraphTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/GraphTests.cpp
@@ -39,8 +39,8 @@ public:
     int fShape[] = {2, 2, 2, 1, 2, 0, 1, 102};
      */
     GraphTests() {
-        //Environment::getInstance()->setDebug(true);
-        //Environment::getInstance()->setVerbose(true);
+        //Environment::getInstance().setDebug(true);
+        //Environment::getInstance().setVerbose(true);
     }
 };
 
@@ -910,7 +910,7 @@ TEST_F(GraphTests, TestMultiOutput1) {
     auto nodeB0 = new Node(OpType_TRANSFORM_SAME, transform::Abs, 2, {-2}, {11});
     nodeB0->markInplace(false);
 
-    auto op = sd::ops::OpRegistrator::getInstance()->getOperation("testop2i2o");
+    auto op = sd::ops::OpRegistrator::getInstance().getOperation("testop2i2o");
 
     // this op will add 1.0 to first input, and 2.0 for second input
     auto nodeT = new Node(op, 11, {1, 2}, {21, 31}, {}, 0.0f);
@@ -951,7 +951,7 @@ TEST_F(GraphTests, TestMultiOutput1) {
 }
 
 TEST_F(GraphTests, TestDivergentNode1) {
-    auto op = sd::ops::OpRegistrator::getInstance()->getOperation("Switch");
+    auto op = sd::ops::OpRegistrator::getInstance().getOperation("Switch");
     auto nodeY = new Node(op, 1);
 
     ASSERT_TRUE(nodeY->isDivergencePoint());
diff --git a/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp b/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp
index da513f7d4..431a4bc14 100644
--- a/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/HashUtilsTests.cpp
@@ -29,7 +29,7 @@ class HashUtilsTests : public testing::Test {
 TEST_F(HashUtilsTests, TestEquality1) {
     std::string str("Conv2D");
 
-    Nd4jLong hash1 = sd::ops::HashHelper::getInstance()->getLongHash(str);
+    Nd4jLong hash1 = sd::ops::HashHelper::getInstance().getLongHash(str);
     ASSERT_EQ(-1637140380760460323L, hash1);
 }
 
@@ -38,6 +38,6 @@ TEST_F(HashUtilsTests, TestEquality1) {
 TEST_F(HashUtilsTests, TestEquality2) {
     std::string str("switch");
 
-    Nd4jLong hash1 = sd::ops::HashHelper::getInstance()->getLongHash(str);
+    Nd4jLong hash1 = sd::ops::HashHelper::getInstance().getLongHash(str);
     ASSERT_EQ(-1988317239813741487L, hash1);
 }
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp
index e6992d7a2..23080161a 100644
--- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp
@@ -356,8 +356,8 @@ TEST_F(JavaInteropTests, TestInplace_1) {
 }
 
 TEST_F(JavaInteropTests, Test_Synonyms_1) {
-    auto op = OpRegistrator::getInstance()->getOperation("RDiv");
-    auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide");
+    auto op = OpRegistrator::getInstance().getOperation("RDiv");
+    auto opRef = OpRegistrator::getInstance().getOperation("reversedivide");
     std::string nameExp("reversedivide");
 
     ASSERT_TRUE(op != nullptr);
@@ -371,8 +371,8 @@ TEST_F(JavaInteropTests, Test_Synonyms_1) {
 }
 
 TEST_F(JavaInteropTests, Test_Synonyms_2) {
-    auto op = OpRegistrator::getInstance()->getOperation("RDiv");
-    auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide");
+    auto op = OpRegistrator::getInstance().getOperation("RDiv");
+    auto opRef = OpRegistrator::getInstance().getOperation("reversedivide");
     std::string nameExp("reversedivide");
 
     ASSERT_TRUE(op != nullptr);
@@ -386,8 +386,8 @@ TEST_F(JavaInteropTests, Test_Synonyms_2) {
 }
 
 TEST_F(JavaInteropTests, Test_Synonyms_3) {
-    auto op = OpRegistrator::getInstance()->getOperation("RDiv");
-    auto opRef = OpRegistrator::getInstance()->getOperation("reversedivide");
+    auto op = OpRegistrator::getInstance().getOperation("RDiv");
+    auto opRef = OpRegistrator::getInstance().getOperation("reversedivide");
     std::string nameExp("reversedivide");
 
     ASSERT_TRUE(op != nullptr);
@@ -486,7 +486,7 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) {
     Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), x.specialShapeInfo()};
 
     Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()};
-    Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.specialShapeInfo()};
+    Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.special()};
 
     auto result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false);
 
@@ -563,19 +563,19 @@ TEST_F(JavaInteropTests, Test_GraphReuse_1) {
 
     registerGraph(nullptr, 119, (Nd4jPointer) data);
 
-    ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119));
+    ASSERT_TRUE(GraphHolder::getInstance().hasGraph(119));
 
     unregisterGraph(nullptr, 119);
 
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119));
 
 
     delete[] data;
 }
 
 TEST_F(JavaInteropTests, Test_GraphReuse_2) {
-    //Environment::getInstance()->setDebug(true);
-    //Environment::getInstance()->setVerbose(true);
+    //Environment::getInstance().setDebug(true);
+    //Environment::getInstance().setVerbose(true);
 
     auto exp0 = NDArrayFactory::create<float>('c', {3}, {3, 3, 3});
     auto exp1 = NDArrayFactory::create<float>('c', {3}, {6, 6, 6});
@@ -585,13 +585,13 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) {
     uint8_t* data = sd::graph::readFlatBuffers("./resources/reduce_dim_false.fb");
 
     // we ensure that there's no such a graph stored earlier
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119));
 
     // register the graph, to call for it later
     registerGraph(nullptr, 119, (Nd4jPointer) data);
 
     // and ensure we're ok
-    ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119));
+    ASSERT_TRUE(GraphHolder::getInstance().hasGraph(119));
 
 
 
@@ -647,7 +647,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) {
     //////// clean out
     unregisterGraph(nullptr, 119);
 
-    ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119));
+    ASSERT_FALSE(GraphHolder::getInstance().hasGraph(119));
 
 
     delete[] data;
@@ -830,8 +830,8 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) {
         extraPointers = new Nd4jPointer[6] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer()};
     #endif
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {0,1});
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {0,1});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {0,1});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {0,1});
 
     NDArray::prepareSpecialUse({&z}, {&x, &y, &dims});
     OpaqueDataBuffer xBuf(x.dataBuffer());
@@ -853,14 +853,14 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) {
 
 /*
 TEST_F(JavaInteropTests, Test_SimpleIf_Output) {
-    Environment::getInstance()->setDebug(true);
-    Environment::getInstance()->setVerbose(false);
+    Environment::getInstance().setDebug(true);
+    Environment::getInstance().setVerbose(false);
 
     auto pl = sd::graph::readFlatBuffers("./resources/simpleif_0_1.fb");
     auto ptr = executeFlatGraph(nullptr, pl);
 
-    Environment::getInstance()->setDebug(false);
-    Environment::getInstance()->setVerbose(false);
+    Environment::getInstance().setDebug(false);
+    Environment::getInstance().setVerbose(false);
 
     delete[] pl;
     delete ptr;
@@ -979,7 +979,7 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_float) {
 }
 
 TEST_F(JavaInteropTests, Test_Mixed_Add_1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto arrayX = NDArrayFactory::create<int>({1, 2, 3, 4});
@@ -1226,7 +1226,7 @@ TEST_F(JavaInteropTests, Test_Fastpath_7) {
 }
 
 TEST_F(JavaInteropTests, test_bfloat16_rng) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto z = NDArrayFactory::create<bfloat16>('c', {10});
@@ -1307,7 +1307,7 @@ TEST_F(JavaInteropTests, test_expandable_array_op_1) {
 }
 
 TEST_F(JavaInteropTests, test_workspace_backed_arrays_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto x = NDArrayFactory::create<double>('c', {4, 3, 4, 4});
@@ -1338,7 +1338,7 @@ TEST_F(JavaInteropTests, test_workspace_backed_arrays_1) {
 }
 
 TEST_F(JavaInteropTests, test_linspace_shape_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     sd::ops::lin_space op;
diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
index 622ce9fbb..922d94afd 100644
--- a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
+++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu
@@ -48,7 +48,7 @@ TEST_F(LegacyOpsCudaTests, test_sortTad_1) {
     auto e = NDArrayFactory::create<float>('c', {3, 5}, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f});
 
     int axis = 1;
-    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), axis);
+    auto packX = ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), axis);
 
     Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
 
diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp
index 7c7734b38..fe9c5a7a0 100644
--- a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp
@@ -394,7 +394,7 @@ TEST_F(LegacyOpsTests, BroadcastingTests_2) {
     int axis = 1;
 
     // shape::printShapeInfoLinear("tad shape", tad.tadOnlyShapeInfo);
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {axis});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {axis});
 
     NDArray::prepareSpecialUse({&y}, {&x});
 
@@ -466,8 +466,8 @@ TEST_F(LegacyOpsTests, Reduce3_2) {
         extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()};
     #endif
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1});
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1});
 
     NDArray::prepareSpecialUse({&z}, {&x, &y, &dim});
     OpaqueDataBuffer xBuf(x.dataBuffer());
@@ -506,8 +506,8 @@ TEST_F(LegacyOpsTests, Reduce3_3) {
         extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()};
     #endif
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1});
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1});
 
     NDArray::prepareSpecialUse({&z}, {&x, &y, &dim});
     OpaqueDataBuffer xBuf(x.dataBuffer());
@@ -546,8 +546,8 @@ TEST_F(LegacyOpsTests, Reduce3_4) {
         extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()};
     #endif
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1});
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1});
 
     NDArray::prepareSpecialUse({&z}, {&x, &y, &dim});
     OpaqueDataBuffer xBuf(x.dataBuffer());
@@ -588,8 +588,8 @@ TEST_F(LegacyOpsTests, Reduce3_5) {
         extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()};
     #endif
 
-    auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1});
-    auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1});
+    auto packX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1});
+    auto packY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), {1});
 
     NDArray::prepareSpecialUse({&z}, {&x, &y, &dim});
 
@@ -616,8 +616,8 @@ TEST_F(LegacyOpsTests, test_Reduce3_All_1) {
     auto z = NDArrayFactory::create<float>('c', {1000, 1});
     auto dim = NDArrayFactory::create<int>('c', {1}, {-1});
 
-    auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), -1);
-    auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), -1);
+    auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), -1);
+    auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), -1);
 
     sd::LaunchContext* context = sd::LaunchContext::defaultContext();
 
@@ -652,7 +652,7 @@ TEST_F(LegacyOpsTests, test_inverse_broadcast_1) {
     auto e = NDArrayFactory::create<float>('c', {3, 4});
     e.assign(2.0f);
 
-    auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), 1);
+    auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), 1);
 
     y.tickWriteDevice();
 
@@ -680,7 +680,7 @@ TEST_F(LegacyOpsTests, test_inverse_broadcast_2) {
     auto erow = e(1, {0});
     erow.assign(true);
 
-    auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), 1);
+    auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), 1);
 
     z.tickWriteDevice();
 
@@ -739,7 +739,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_3) {
 }
 
 TEST_F(LegacyOpsTests, test_legacy_reduce_empty_4) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
     int a = 0;
 
diff --git a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp
index c1df42fd1..7200dc034 100644
--- a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp
@@ -34,7 +34,7 @@ public:
 
 TEST_F(MmapTests, Test_Basic_Mmap_1) {
     // FIXME: we must adopt this for CUDA as well
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     // just 10GB
diff --git a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp
index 803029216..79f2ffa1e 100644
--- a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp
@@ -56,7 +56,7 @@ TEST_F(MultiDataTypeTests, DataTypeUtils_Test_3) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<float>('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
@@ -70,7 +70,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<float>('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
@@ -84,7 +84,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_3) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<int>('c', {2, 3}, {0, 1, 2, 3, 4, 5});
@@ -98,7 +98,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_3) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_4) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<double>('c', {2, 3}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
@@ -112,7 +112,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_4) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_5) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<int>('c', {2, 3}, {0, 1, 2, 3, 4, 5});
@@ -125,7 +125,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_5) {
 }
 
 TEST_F(MultiDataTypeTests, Basic_Test_7) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<int>('c', {2, 3}, {0, 1, 2, 3, 4, 5});
@@ -143,7 +143,7 @@ TEST_F(MultiDataTypeTests, Basic_Test_7) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, Basic_Test_6) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     auto x = NDArrayFactory::create<Nd4jLong>('c', {2, 3}, {0, 1, 2, 3, 4, 5});
@@ -301,7 +301,7 @@ TEST_F(MultiDataTypeTests, ndarray_varianceNumber_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -316,7 +316,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -340,7 +340,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlus_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -355,7 +355,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -382,7 +382,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinus_test2) {
 
 //////////////////////////////////////////////////////////////////////////////// multiply
 TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -397,7 +397,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {0, 1, 2, 3},     sd::DataType::INT64);
@@ -422,7 +422,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiply_test2) {
 
 //////////////////////////////////////////////////////////////////////////////// multiply
 TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {4, 1, 2, 3},     sd::DataType::HALF);
@@ -438,7 +438,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2, 2}, {1, 2, 3, 4},     sd::DataType::INT64);
@@ -470,7 +470,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivide_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray scalar1('c', {0}, std::vector<double>{4}, sd::DataType::INT32);
@@ -510,7 +510,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32);
@@ -548,7 +548,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorPlusEqual_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray scalar1('c', {0}, std::vector<double>{4}, sd::DataType::INT32);
@@ -588,7 +588,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32);
@@ -626,7 +626,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMinusEqual_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray scalar1('c', {0}, std::vector<double>{3}, sd::DataType::INT32);
@@ -666,7 +666,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::FLOAT32);
@@ -704,7 +704,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorMultiplyEqual_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray scalar1('c', {0}, std::vector<double>{3}, sd::DataType::INT32);
@@ -744,7 +744,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 2, 4, 6}, sd::DataType::FLOAT32);
@@ -782,7 +782,7 @@ TEST_F(MultiDataTypeTests, ndarray_operatorDivideEqual_test2) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_reduceNumberFloat_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -819,7 +819,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberFloat_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_reduceNumberSame_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -856,7 +856,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberSame_test1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_reduceNumberBool_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, -1, 2, -3}, sd::DataType::INT64);
@@ -889,7 +889,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberBool_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_reduceNumberLong_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -925,7 +925,7 @@ TEST_F(MultiDataTypeTests, ndarray_reduceNumberLong_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_indexReduceNumber_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT32);
@@ -948,7 +948,7 @@ TEST_F(MultiDataTypeTests, ndarray_indexReduceNumber_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTransformFloat_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 4, 9, 16}, sd::DataType::INT64);
@@ -986,7 +986,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformFloat_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTransformSame_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -1031,7 +1031,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformSame_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTransformBool_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -1067,7 +1067,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformBool_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTransformStrict_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::HALF);
@@ -1113,7 +1113,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTransformStrict_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,3}, {0,     1,   2,   3,   4,   5}, sd::DataType::INT32);
@@ -1147,7 +1147,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,3}, {1,     1,   2,   3,   4,   5}, sd::DataType::INT32);
@@ -1176,7 +1176,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyPairwiseTransform_test2) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyBroadcast_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,3}, {10, 20, 30, 40, 50, 60}, sd::DataType::INT32);
@@ -1222,7 +1222,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyBroadcast_test2) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {10, 20, 30, 40}, sd::DataType::INT32);
@@ -1281,7 +1281,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test1) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {10, 20, 30, 40}, sd::DataType::HALF);
@@ -1310,7 +1310,7 @@ TEST_F(MultiDataTypeTests, ndarray_applyTrueBroadcast_test2) {
 
 //////////////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, ndarray_applyScalar_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,2}, {0, 1, 2, 3}, sd::DataType::INT64);
@@ -1697,7 +1697,7 @@ TEST_F(MultiDataTypeTests, applyAllReduce3_test1) {
 
 //////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, RowCol_test1) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,3}, {1,2,3,4,5,6}, sd::DataType::INT32);
@@ -1726,7 +1726,7 @@ TEST_F(MultiDataTypeTests, RowCol_test1) {
 
 //////////////////////////////////////////////////////////////////////
 TEST_F(MultiDataTypeTests, RowCol_test2) {
-    if (!Environment::getInstance()->isExperimentalBuild())
+    if (!Environment::getInstance().isExperimentalBuild())
         return;
 
     NDArray x1('c', {2,3}, {1,2,3,4,5,6}, sd::DataType::INT32);
diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu
index f95705f08..01510dc91 100644
--- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu
+++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu
@@ -686,7 +686,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_PrimitiveCosine_3) {
 
 TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) {
 
-    //if (!Environment::getInstance()->isExperimentalBuild())
+    //if (!Environment::getInstance().isExperimentalBuild())
     //    return;
 
     NDArray x = NDArrayFactory::create<double>('c', {2,3,4});
@@ -746,7 +746,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) {
 
 TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) {
 
-    //if (!Environment::getInstance()->isExperimentalBuild())
+    //if (!Environment::getInstance().isExperimentalBuild())
     //    return;
 
     NDArray x('c', {2,3,4}, sd::DataType::DOUBLE);
@@ -944,7 +944,7 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastMultiply_002) {
 ////////////////////////////////////////////////////////////////////////////
 TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) {
 
-    //if (!Environment::getInstance()->isExperimentalBuild())
+    //if (!Environment::getInstance().isExperimentalBuild())
     //    return;
 
     NDArray x('c', {2,3,4}, {100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100}, sd::DataType::INT32);
diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
index 3d0df208f..2f87b5099 100644
--- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp
@@ -95,7 +95,7 @@ TEST_F(NativeOpsTests, ThresholdTests_1) {
     printf("Unsupported for cuda now.\n");
 #else
     ::setElementThreshold(4);
-    ASSERT_TRUE(4 == sd::Environment::getInstance()->elementwiseThreshold());
+    ASSERT_TRUE(4 == sd::Environment::getInstance().elementwiseThreshold());
 #endif
 
 }
@@ -107,7 +107,7 @@ TEST_F(NativeOpsTests, ThresholdTests_2) {
     printf("Unsupported for cuda now.\n");
 #else
     ::setTADThreshold(4);
-    ASSERT_TRUE(4 == sd::Environment::getInstance()->tadThreshold());
+    ASSERT_TRUE(4 == sd::Environment::getInstance().tadThreshold());
 #endif
 
 }
@@ -644,8 +644,8 @@ TEST_F(NativeOpsTests, Reduce3Test_4) {
     x.syncToDevice();
     dimension.syncToHost();
     int* dimensions = reinterpret_cast<int*>(dimension.buffer());
-    auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
-    auto tadPackY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackY = sd::ConstantTadHelper::getInstance().tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf());
 
     auto hTADShapeInfoX = tadPackX.primaryShapeInfo();
     auto hTADOffsetsX = tadPackX.primaryOffsets();
@@ -963,8 +963,8 @@ TEST_F(NativeOpsTests, ScalarTadTest_1) {
     z.syncToDevice();
     auto dimension = NDArrayFactory::create<int>({0, 1});
     auto dimensions = reinterpret_cast<int*>(dimension.buffer());
-    auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
-    auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
 
     OpaqueDataBuffer xBuf(x.dataBuffer());
     OpaqueDataBuffer yBuf(y.dataBuffer());
@@ -1008,8 +1008,8 @@ TEST_F(NativeOpsTests, ScalarTadTest_2) {
     z.syncToDevice();
     auto dimension = NDArrayFactory::create<int>({0, 1});
     auto dimensions = reinterpret_cast<int*>(dimension.buffer());
-    auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
-    auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
     z.assign(true);
 
     OpaqueDataBuffer xBuf(x.dataBuffer());
@@ -1057,8 +1057,8 @@ TEST_F(NativeOpsTests, ConcatTest_2) {
     int d = 0;
     auto dimension = NDArrayFactory::create<int>('c', {1}, {d});
     auto dimensions = reinterpret_cast<int*>(dimension.buffer());
-    //auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
-    auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
+    //auto tadPackX = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
+    auto tadPackZ = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
     exp.linspace(1);
     Nd4jPointer datas[] = {x.buffer(), y.buffer()};
     Nd4jPointer shapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()};
@@ -1125,8 +1125,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) {
 
     std::vector<int> dims = {1};
 
-    auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims);
-    auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims);
+    auto xTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), dims);
+    auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(z.shapeInfo(), dims);
 
     Nd4jPointer nativeStart[2];
 
@@ -1230,7 +1230,7 @@ TEST_F(NativeOpsTests, ShuffleTest_1) {
     Nd4jPointer zShapeList[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.shapeInfo()};
     Nd4jPointer dzShapeList[] = {(Nd4jPointer)z.specialShapeInfo(), (Nd4jPointer)z.specialShapeInfo()};
     int shuffleMap[] = {1, 0, 4, 3, 2};
-    auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1});
+    auto zTadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), {1});
     Nd4jPointer zListOffset[] = {(Nd4jPointer)zTadPack.platformOffsets(), (Nd4jPointer)zTadPack.platformOffsets()};
     Nd4jPointer zListTADs[] = {(Nd4jPointer)zTadPack.platformShapeInfo(), (Nd4jPointer)zTadPack.platformShapeInfo()};
     ::shuffle(nullptr,
@@ -1411,7 +1411,7 @@ TEST_F(NativeOpsTests, SortTest_4) {
     auto exp = NDArrayFactory::create<int>('c', {3, 6}, {1, 5, 5, 10, 34, 120, 3, 29, 78, 111, 138, 331, 4, 50, 56, 71, 73, 91});
 
     std::vector<int> dims({1});
-    auto packX = ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), {1});
+    auto packX = ConstantTadHelper::getInstance().tadForDimensions(sortedVals.shapeInfo(), {1});
     ::sortTad(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(),
              sortedVals.specialShapeInfo(), dims.data(), dims.size(), packX.platformShapeInfo(), packX.platformOffsets(), false);
 //    sortedVals.printBuffer("OUT");
diff --git a/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp b/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp
index a7c7eae24..af327d653 100644
--- a/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp
@@ -31,12 +31,12 @@ private:
     int ewt = 0;
 public:
     OmpLaunchHelperTests() {
-        this->ewt = Environment::getInstance()->elementwiseThreshold();
-        Environment::getInstance()->setElementwiseThreshold(1000);
+        this->ewt = Environment::getInstance().elementwiseThreshold();
+        Environment::getInstance().setElementwiseThreshold(1000);
     };
 
     ~OmpLaunchHelperTests() {
-        Environment::getInstance()->setElementwiseThreshold(this->ewt);
+        Environment::getInstance().setElementwiseThreshold(this->ewt);
     }
 };
 
@@ -85,7 +85,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_1) {
     Nd4jLong numTads = 16;
     Nd4jLong tadLength = 16;
 
-//    nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance()->tadThreshold(), Environment::getInstance()->elementwiseThreshold());
+//    nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance().tadThreshold(), Environment::getInstance().elementwiseThreshold());
     ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));
 }
 
@@ -94,7 +94,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_2) {
         return;
 
     Nd4jLong numTads = 2;
-    Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();
+    Nd4jLong tadLength = Environment::getInstance().elementwiseThreshold();
 
     ASSERT_EQ(2, OmpLaunchHelper::tadThreads(tadLength, numTads));
 }
@@ -117,7 +117,7 @@ TEST_F(OmpLaunchHelperTests, test_tad_threads_5) {
     auto exp = omp_get_max_threads();
 
     Nd4jLong numTads = exp;
-    Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();
+    Nd4jLong tadLength = Environment::getInstance().elementwiseThreshold();
 
     ASSERT_EQ(exp, OmpLaunchHelper::tadThreads(tadLength, numTads));
 }
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp b/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp
index fe581e09e..a14971ad5 100644
--- a/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/OpTrackerTests.cpp
@@ -42,24 +42,24 @@ public:
 TEST_F(OpTrackerTests, Test_Existence_1) {
     sd::_loader loader;
 
-    // nd4j_printf("Groups: %i; Operations: %i\n", OpTracker::getInstance()->totalGroups(), OpTracker::getInstance()->totalOperations());
+    // nd4j_printf("Groups: %i; Operations: %i\n", OpTracker::getInstance().totalGroups(), OpTracker::getInstance().totalOperations());
 
-    ASSERT_TRUE(OpTracker::getInstance()->totalGroups() > 0);
-    ASSERT_TRUE(OpTracker::getInstance()->totalOperations() > 0);
+    ASSERT_TRUE(OpTracker::getInstance().totalGroups() > 0);
+    ASSERT_TRUE(OpTracker::getInstance().totalOperations() > 0);
 
-    OpTracker::getInstance()->exportOperations();
+    OpTracker::getInstance().exportOperations();
 }
 
 TEST_F(OpTrackerTests, Test_Ops_List_1) {
     sd::ops::less op;
-    auto vec = OpRegistrator::getInstance()->getAllHashes();
+    auto vec = OpRegistrator::getInstance().getAllHashes();
 
     // nd4j_printf("Total ops: %lld\n", vec.size());
     // nd4j_printf("Less hash: %lld\n", op.getOpHash());
 
     for (const auto &v: vec) {
         if (v == 5484196977525668316L) {
-            auto op = OpRegistrator::getInstance()->getOperation(v);
+            auto op = OpRegistrator::getInstance().getOperation(v);
             // nd4j_printf("OpName: %s\n", op->getOpName()->c_str());
         }
     }
diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
index 91ddcbd30..a8f45cc48 100644
--- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp
@@ -124,12 +124,12 @@ TEST_F(PlaygroundTests, test_bert_full_1) {
 
 */
 
-    sd::Environment::getInstance()->setProfiling(true);
+    sd::Environment::getInstance().setProfiling(true);
     auto profile = GraphProfilingHelper::profile(graph, 1);
 
     profile->printOut();
 
-    sd::Environment::getInstance()->setProfiling(false);
+    sd::Environment::getInstance().setProfiling(false);
     delete profile;
 
 /*
@@ -185,12 +185,12 @@ TEST_F(PlaygroundTests, test_bert_1) {
     ASSERT_EQ(z, *array);
 
 */
-    sd::Environment::getInstance()->setProfiling(true);
+    sd::Environment::getInstance().setProfiling(true);
     auto profile = GraphProfilingHelper::profile(graph, 1);
 
     profile->printOut();
 
-    sd::Environment::getInstance()->setProfiling(false);
+    sd::Environment::getInstance().setProfiling(false);
     delete profile;
 
 /*
@@ -237,12 +237,12 @@ TEST_F(PlaygroundTests, test_bert_2) {
     ASSERT_EQ(z, *array);
 */
 
-    sd::Environment::getInstance()->setProfiling(true);
+    sd::Environment::getInstance().setProfiling(true);
     auto profile = GraphProfilingHelper::profile(graph, 1);
 
     profile->printOut();
 
-    sd::Environment::getInstance()->setProfiling(false);
+    sd::Environment::getInstance().setProfiling(false);
     delete profile;
 
 /*
@@ -631,7 +631,7 @@ TEST_F(PlaygroundTests, test_s_0) {
 
     for (auto shape: shapes) {
         for (auto t: threads) {
-            sd::Environment::getInstance()->setMaxMasterThreads(t);
+            sd::Environment::getInstance().setMaxMasterThreads(t);
 
             auto x = NDArrayFactory::create<float>('c', shape);
             auto y = NDArrayFactory::create<float>('c', {shape[3]});
@@ -670,7 +670,7 @@ TEST_F(PlaygroundTests, test_s_1) {
 
     for (auto shape: shapes) {
         for (auto t: threads) {
-            sd::Environment::getInstance()->setMaxMasterThreads(t);
+            sd::Environment::getInstance().setMaxMasterThreads(t);
 
             auto x = NDArrayFactory::create<float>('c', shape);
             auto y = NDArrayFactory::create<float>('c', {shape[1]});
diff --git a/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp b/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp
index e0d03731b..50c1f4b19 100644
--- a/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ServerRelatedTests.cpp
@@ -29,13 +29,13 @@ using namespace sd::graph;
 class ServerRelatedTests : public testing::Test {
 public:
     ServerRelatedTests() {
-        Environment::getInstance()->setDebug(true);
-        Environment::getInstance()->setVerbose(true);
+        Environment::getInstance().setDebug(true);
+        Environment::getInstance().setVerbose(true);
     }
 
     ~ServerRelatedTests() {
-        Environment::getInstance()->setDebug(false);
-        Environment::getInstance()->setVerbose(false);
+        Environment::getInstance().setDebug(false);
+        Environment::getInstance().setVerbose(false);
     }
 };
 /*
@@ -89,9 +89,9 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_1) {
 
     auto exp = NDArrayFactory::create<float>('c', {3}, {3.f, 3.f, 3.f});
 
-    GraphHolder::getInstance()->registerGraph(11901L, oGraph);
+    GraphHolder::getInstance().registerGraph(11901L, oGraph);
 
-    auto cGraph = GraphHolder::getInstance()->cloneGraph(11901L);
+    auto cGraph = GraphHolder::getInstance().cloneGraph(11901L);
 
     ASSERT_TRUE(oGraph != cGraph);
 
@@ -108,7 +108,7 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_1) {
 
     delete cGraph;
 
-    GraphHolder::getInstance()->dropGraphAny(11901L);
+    GraphHolder::getInstance().dropGraphAny(11901L);
 }
 
 TEST_F(ServerRelatedTests, Basic_Execution_Test_2) {
@@ -120,9 +120,9 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_2) {
     auto input0 = NDArrayFactory::create<float>('c', {3, 3}, {2.f,2.f,2.f, 2.f,2.f,2.f, 2.f,2.f,2.f});
     auto exp = NDArrayFactory::create<float>('c', {3}, {6.f, 6.f, 6.f});
 
-    GraphHolder::getInstance()->registerGraph(11902L, oGraph);
+    GraphHolder::getInstance().registerGraph(11902L, oGraph);
 
-    auto cGraph = GraphHolder::getInstance()->cloneGraph(11902L);
+    auto cGraph = GraphHolder::getInstance().cloneGraph(11902L);
 
     ASSERT_TRUE(oGraph != cGraph);
 
@@ -148,7 +148,7 @@ TEST_F(ServerRelatedTests, Basic_Execution_Test_2) {
 
     delete cGraph;
 
-    GraphHolder::getInstance()->dropGraphAny(11902L);
+    GraphHolder::getInstance().dropGraphAny(11902L);
 }
 
 TEST_F(ServerRelatedTests, BasicExecutionTests_3) {
@@ -160,7 +160,7 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) {
     auto input0 = NDArrayFactory::create<float>('c', {3, 3}, {2.f,2.f,2.f, 2.f,2.f,2.f, 2.f,2.f,2.f});
     auto exp = NDArrayFactory::create<float>('c', {3}, {6.f, 6.f, 6.f});
 
-    GraphHolder::getInstance()->registerGraph(11903L, oGraph);
+    GraphHolder::getInstance().registerGraph(11903L, oGraph);
 
     // mastering InferenceRequest
     InferenceRequest ir(11903L);
@@ -172,7 +172,7 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) {
     auto fir = GetFlatInferenceRequest(fptr);
 
 
-    auto flatResult = GraphHolder::getInstance()->execute(fir->id(), builder, fir);
+    auto flatResult = GraphHolder::getInstance().execute(fir->id(), builder, fir);
 
     builder.Finish(flatResult);
     auto ptr = builder.GetBufferPointer();
@@ -183,6 +183,6 @@ TEST_F(ServerRelatedTests, BasicExecutionTests_3) {
 
     ASSERT_EQ(exp, *restored.at(0)->getNDArray());
 
-    GraphHolder::getInstance()->dropGraphAny(11903L);
+    GraphHolder::getInstance().dropGraphAny(11903L);
 }
 #endif
diff --git a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp
index 4dcedf035..a31547561 100644
--- a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp
@@ -34,7 +34,7 @@ public:
 
 
 TEST_F(SortCpuTests, test_linear_sort_by_key_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
@@ -51,7 +51,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_key_1) {
 }
 
 TEST_F(SortCpuTests, test_linear_sort_by_val_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
@@ -68,7 +68,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_val_1) {
 }
 
 TEST_F(SortCpuTests, test_tad_sort_by_key_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8,   1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
@@ -86,7 +86,7 @@ TEST_F(SortCpuTests, test_tad_sort_by_key_1) {
 }
 
 TEST_F(SortCpuTests, test_tad_sort_by_val_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8,   1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
diff --git a/libnd4j/tests_cpu/layers_tests/TadTests.cpp b/libnd4j/tests_cpu/layers_tests/TadTests.cpp
index a2cdec003..947927bfb 100644
--- a/libnd4j/tests_cpu/layers_tests/TadTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/TadTests.cpp
@@ -245,13 +245,13 @@ TEST_F(TadTests, test_tad_order_4) {
 
 TEST_F(TadTests, test_column_1) {
     auto x = NDArrayFactory::create<float>('c', {5, 2});
-    auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), 0);
+    auto tadPack = sd::ConstantTadHelper::getInstance().tadForDimensions(x.shapeInfo(), 0);
 
     ASSERT_EQ(1, shape::rank(tadPack.primaryShapeInfo()));
     ASSERT_EQ(5, shape::length(tadPack.primaryShapeInfo()));
     ASSERT_TRUE(shape::isVector(tadPack.primaryShapeInfo()));
 
-    auto scalarViewPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(tadPack.primaryShapeInfo(), 0);
+    auto scalarViewPack = sd::ConstantTadHelper::getInstance().tadForDimensions(tadPack.primaryShapeInfo(), 0);
 
     ASSERT_TRUE(shape::equalsStrict(tadPack.primaryShapeInfo(), scalarViewPack.primaryShapeInfo()));
 }
diff --git a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp
index a9450e9d0..71957bc59 100644
--- a/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ThreadsTests.cpp
@@ -208,9 +208,31 @@ TEST_F(ThreadsTests, reduction_test_1) {
     ASSERT_EQ(8192, sum);
 }
 
+static void _code(int thread_id) {
+  auto x = NDArrayFactory::create<float>('c', {65536 * 16});
+  x.assign(1.1f);
+}
+
+TEST_F(ThreadsTests, crash_test_1) {
+  if (!Environment::getInstance().isCPU())
+    return;
+
+  for (int e = 0; e < 3; e++) {
+    std::vector<std::thread> threads(std::thread::hardware_concurrency());
+
+    // creating some threads
+    for (int t = 0; t < threads.size(); t++)
+      threads[t] = std::thread(_code, t);
+
+    // blocking until everything is finished
+    for (auto &t:threads)
+      t.join();
+  }
+}
+
 /*
 TEST_F(ThreadsTests, basic_test_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     auto instance = samediff::ThreadPool::getInstance();
diff --git a/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp b/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp
index 571db71f3..b291e5fbb 100644
--- a/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/WorkspaceTests.cpp
@@ -112,7 +112,7 @@ TEST_F(WorkspaceTests, ResetTest1) {
 
 
 TEST_F(WorkspaceTests, StretchTest1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     Workspace workspace(128);
@@ -147,7 +147,7 @@ TEST_F(WorkspaceTests, StretchTest1) {
 }
 
 TEST_F(WorkspaceTests, NewInWorkspaceTest1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     Workspace ws(65536);
@@ -155,11 +155,11 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest1) {
     ASSERT_EQ(65536, ws.getCurrentSize());
     ASSERT_EQ(0, ws.getCurrentOffset());
 
-    ASSERT_FALSE(MemoryRegistrator::getInstance()->hasWorkspaceAttached());
+    ASSERT_FALSE(MemoryRegistrator::getInstance().hasWorkspaceAttached());
 
-    MemoryRegistrator::getInstance()->attachWorkspace(&ws);
+    MemoryRegistrator::getInstance().attachWorkspace(&ws);
 
-    ASSERT_TRUE(MemoryRegistrator::getInstance()->hasWorkspaceAttached());
+    ASSERT_TRUE(MemoryRegistrator::getInstance().hasWorkspaceAttached());
 
     auto ast = NDArrayFactory::create_<float>('c', {5, 5});
 
@@ -167,10 +167,10 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest1) {
 
     delete ast;
 
-    MemoryRegistrator::getInstance()->forgetWorkspace();
+    MemoryRegistrator::getInstance().forgetWorkspace();
 
-    ASSERT_FALSE(MemoryRegistrator::getInstance()->hasWorkspaceAttached());
-    ASSERT_TRUE(MemoryRegistrator::getInstance()->getWorkspace() == nullptr);
+    ASSERT_FALSE(MemoryRegistrator::getInstance().hasWorkspaceAttached());
+    ASSERT_TRUE(MemoryRegistrator::getInstance().getWorkspace() == nullptr);
 }
 
 
@@ -182,7 +182,7 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest2) {
     ASSERT_EQ(65536, ws.getCurrentSize());
     ASSERT_EQ(0, ws.getCurrentOffset());
 
-    MemoryRegistrator::getInstance()->attachWorkspace(&ws);
+    MemoryRegistrator::getInstance().attachWorkspace(&ws);
 
     auto ast = NDArrayFactory::create_<float>('c', {5, 5}, &ctx);
 
@@ -190,11 +190,11 @@ TEST_F(WorkspaceTests, NewInWorkspaceTest2) {
 
     delete ast;
 
-    MemoryRegistrator::getInstance()->forgetWorkspace();
+    MemoryRegistrator::getInstance().forgetWorkspace();
 }
 
 TEST_F(WorkspaceTests, CloneTest1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     Workspace ws(65536);
@@ -250,7 +250,7 @@ TEST_F(WorkspaceTests, Test_Graph_1) {
 #endif
 
 TEST_F(WorkspaceTests, Test_Externalized_1) {
-    if (!Environment::getInstance()->isCPU())
+    if (!Environment::getInstance().isCPU())
         return;
 
     char buffer[10000];
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
index ae9ff1e94..b4bd62096 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
@@ -1119,7 +1119,7 @@ public interface NativeOps {
      */
     int dataTypeFromNpyHeader(Pointer numpyHeader);
 
-    OpaqueConstantDataBuffer shapeBuffer(int rank, LongPointer shape, LongPointer strides, int dtype, char order, long ews, boolean empty);
+    OpaqueConstantShapeBuffer shapeBuffer(int rank, LongPointer shape, LongPointer strides, int dtype, char order, long ews, boolean empty);
 
     OpaqueConstantDataBuffer constantBufferDouble(int dtype, DoublePointer data, int length);
 
@@ -1128,9 +1128,12 @@ public interface NativeOps {
     Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf);
     Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf);
     long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf);
-    long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf);
 
-    void deleteShapeBuffer(OpaqueConstantDataBuffer state);
+    Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf);
+    Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf);
+
+    void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer state);
+    void deleteConstantDataBuffer(OpaqueConstantDataBuffer state);
 
     OpaqueContext createGraphContext(int nodeId);
     OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr);
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java
new file mode 100644
index 000000000..977747fb6
--- /dev/null
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/OpaqueConstantShapeBuffer.java
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2019 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.nd4j.nativeblas;
+
+import org.bytedeco.javacpp.Pointer;
+
+/**
+ *
+ * @author saudet
+ */
+public class OpaqueConstantShapeBuffer extends Pointer {
+    public OpaqueConstantShapeBuffer(Pointer p) { super(p); }
+}
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
index afca1daa5..65bfa24fc 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
@@ -2156,14 +2156,14 @@ public class CudaExecutioner extends DefaultOpExecutioner {
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
 
-        OpaqueConstantDataBuffer dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty);
+        val dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty);
 
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
 
-        val result = new CudaLongDataBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), Shape.shapeInfoLength(shape.length));
+        val result = new CudaLongDataBuffer(nativeOps.getConstantShapeBufferPrimary(dbf), nativeOps.getConstantShapeBufferSpecial(dbf), Shape.shapeInfoLength(shape.length));
 
-        nativeOps.deleteShapeBuffer(dbf);
+        nativeOps.deleteConstantShapeBuffer(dbf);
 
         return result;
     }
@@ -2191,7 +2191,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
 
-        OpaqueConstantDataBuffer dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length);
+        val dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length);
 
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
@@ -2207,7 +2207,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
 
-        OpaqueConstantDataBuffer dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length);
+        val dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length);
 
         if (nativeOps.lastErrorCode() != 0)
             throw new RuntimeException(nativeOps.lastErrorMessage());
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
index cc6ffc19a..38c7188f1 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
@@ -469,6 +469,73 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
 // #endif //DEV_TESTS_DATABUFFER_H
 
 
+// Parsed from array/PointerDeallocator.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+// #ifndef SD_POINTERDEALLOCATOR_H_
+// #define SD_POINTERDEALLOCATOR_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+
+
+
+// #endif //SD_POINTERDEALLOCATOR_H_
+
+
+// Parsed from array/PointerWrapper.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_POINTER_H_
+// #define SD_ARRAY_POINTER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <array/PointerDeallocator.h>
+// #include <memory>
+ // namespace sd
+
+// #endif //SD_ARRAY_POINTER_H_
+
+
 // Parsed from array/ConstantDescriptor.h
 
 /*******************************************************************************
@@ -581,6 +648,9 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
 
 // #include <system/dll.h>
 // #include <system/pointercast.h>
+// #include <memory>
+// #include <array/PointerWrapper.h>
+// #include <array/DataType.h>
     @Namespace("sd") @NoOffset public static class ConstantDataBuffer extends Pointer {
         static { Loader.load(); }
         /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@@ -592,18 +662,16 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
             return (ConstantDataBuffer)super.position(position);
         }
     
-        public ConstantDataBuffer(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf) { super((Pointer)null); allocate(primary, special, numEelements, sizeOf); }
-        private native void allocate(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf);
         public ConstantDataBuffer(@Const @ByRef ConstantDataBuffer other) { super((Pointer)null); allocate(other); }
         private native void allocate(@Const @ByRef ConstantDataBuffer other);
         public ConstantDataBuffer() { super((Pointer)null); allocate(); }
         private native void allocate();
 
-        public native @Cast("Nd4jLong") long sizeOf();
-        public native @Cast("Nd4jLong") long length();
+        public native @Cast("uint8_t") byte sizeOf();
+        public native @Cast("uint64_t") long length();
 
-        public native @Cast("Nd4jPointer") Pointer primary();
-        public native @Cast("Nd4jPointer") Pointer special();
+        public native Pointer primary();
+        public native Pointer special();
 
         public native @ByRef @Name("operator =") ConstantDataBuffer put(@Const @ByRef ConstantDataBuffer other);
     }
@@ -612,6 +680,114 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
 // #endif //DEV_TESTS_CONSTANTDATABUFFER_H
 
 
+// Parsed from array/ConstantShapeBuffer.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+// #define SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <array/PointerWrapper.h>
+// #include <memory>
+
+@Namespace("sd") public static class ConstantShapeBuffer extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ConstantShapeBuffer(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ConstantShapeBuffer(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public ConstantShapeBuffer position(long position) {
+        return (ConstantShapeBuffer)super.position(position);
+    }
+
+  public ConstantShapeBuffer() { super((Pointer)null); allocate(); }
+  private native void allocate();
+
+  public native @Cast("const Nd4jLong*") LongPointer primary();
+  public native @Cast("const Nd4jLong*") LongPointer special();
+  public native @Cast("const Nd4jLong*") LongPointer platform();
+}
+
+ // namespace sd
+
+// #endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+
+
+// Parsed from array/ConstantOffsetsBuffer.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+// #define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <memory>
+// #include <array/PointerWrapper.h>
+
+@Namespace("sd") public static class ConstantOffsetsBuffer extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ConstantOffsetsBuffer(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ConstantOffsetsBuffer(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public ConstantOffsetsBuffer position(long position) {
+        return (ConstantOffsetsBuffer)super.position(position);
+    }
+
+  public ConstantOffsetsBuffer() { super((Pointer)null); allocate(); }
+  private native void allocate();
+
+  public native @Cast("const Nd4jLong*") LongPointer primary();
+  public native @Cast("const Nd4jLong*") LongPointer special();
+  public native @Cast("const Nd4jLong*") LongPointer platform();
+}
+
+ // namespace sd
+
+// #endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+
+
 // Parsed from array/TadPack.h
 
 /*******************************************************************************
@@ -637,7 +813,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
 // #ifndef DEV_TESTS_TADPACK_H
 // #define DEV_TESTS_TADPACK_H
 
-// #include "ConstantDataBuffer.h"
+// #include <array/ConstantOffsetsBuffer.h>
+// #include <array/ConstantShapeBuffer.h>
     @Namespace("sd") @NoOffset public static class TadPack extends Pointer {
         static { Loader.load(); }
         /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@@ -649,8 +826,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
             return (TadPack)super.position(position);
         }
     
-        public TadPack(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); }
-        private native void allocate(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads);
+        public TadPack(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); }
+        private native void allocate(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads);
         public TadPack() { super((Pointer)null); allocate(); }
         private native void allocate();
 
@@ -859,7 +1036,7 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
         public native int _blasMinorVersion(); public native Environment _blasMinorVersion(int setter);
         public native int _blasPatchVersion(); public native Environment _blasPatchVersion(int setter);
 
-        public static native Environment getInstance();
+        public static native @ByRef Environment getInstance();
 
         public native @Cast("bool") boolean isVerbose();
         public native void setVerbose(@Cast("bool") boolean reallyVerbose);
@@ -3048,9 +3225,9 @@ public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointe
 public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo);
 public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") long[] specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo);
 
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
 
 public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length);
 public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length);
@@ -3063,9 +3240,12 @@ public native OpaqueConstantDataBuffer constantBuffer(@Cast("sd::DataType") int
 public native @Cast("Nd4jPointer") Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf);
 public native @Cast("Nd4jPointer") Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf);
 public native @Cast("Nd4jLong") long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf);
-public native @Cast("Nd4jLong") long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf);
 
-public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr);
+public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf);
+public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf);
+
+public native void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer ptr);
+public native void deleteConstantDataBuffer(OpaqueConstantDataBuffer ptr);
 
 public native OpaqueContext createGraphContext(int nodeId);
 public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr);
@@ -3639,6 +3819,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #include <memory>
 // #include <array/InteropDataBuffer.h>
 // #include <memory/MemoryCounter.h>
+// #include <array/ConstantShapeBuffer.h>
 
 
 
@@ -4478,7 +4659,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
         public native void setShapeInfo(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype);
         public native void setShapeInfo(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype);
         public native void setShapeInfo(@Const @ByRef ShapeDescriptor descriptor);
-        public native void setShapeInfo(@Const @ByRef ConstantDataBuffer shapeBuffer);
+        public native void setShapeInfo(@Const @ByRef ConstantShapeBuffer shapeBuffer);
 
         /**
         *  returns absolute offset which corresponds to given sequential index
@@ -7202,13 +7383,13 @@ public static final int PREALLOC_SIZE = 33554432;
  * @param rank the rank of the shape
  */
 
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shape, int rank);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shape, int rank);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shape, int rank);
 
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shapeInfo);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shapeInfo);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shapeInfo);
 /**
  * Returns the shape portion of an information
  * buffer
@@ -10143,7 +10324,7 @@ public static final int PREALLOC_SIZE = 33554432;
             public OpRegistrator(Pointer p) { super(p); }
         
 
-            public static native OpRegistrator getInstance();
+            public static native @ByRef OpRegistrator getInstance();
 
             public static native void exitHandler();
             public static native void sigIntHandler(int sig);
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java
index 05b335c87..6ac8e133a 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java
@@ -34,8 +34,12 @@ import org.bytedeco.javacpp.tools.InfoMapper;
                 value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
                         "array/DataType.h",
                         "array/DataBuffer.h",
+                        "array/PointerDeallocator.h",
+                        "array/PointerWrapper.h",
                         "array/ConstantDescriptor.h",
                         "array/ConstantDataBuffer.h",
+                        "array/ConstantShapeBuffer.h",
+                        "array/ConstantOffsetsBuffer.h",
                         "array/TadPack.h",
                         "execution/ErrorReference.h",
                         "execution/Engine.h",
@@ -166,6 +170,8 @@ public class Nd4jCudaPresets implements LoadEnabled, InfoMapper {
                 .put(new Info("OpaqueVariablesSet").pointerTypes("OpaqueVariablesSet"))
                 .put(new Info("OpaqueVariable").pointerTypes("OpaqueVariable"))
                 .put(new Info("OpaqueConstantDataBuffer").pointerTypes("OpaqueConstantDataBuffer"))
+                .put(new Info("OpaqueConstantShapeBuffer").pointerTypes("OpaqueConstantShapeBuffer"))
+                .put(new Info("OpaqueConstantOffsetsBuffer").pointerTypes("OpaqueConstantOffsetsBuffer"))
                 .put(new Info("OpaqueContext").pointerTypes("OpaqueContext"))
                 .put(new Info("OpaqueRandomGenerator").pointerTypes("OpaqueRandomGenerator"))
                 .put(new Info("OpaqueLaunchContext").pointerTypes("OpaqueLaunchContext"))
@@ -187,7 +193,7 @@ public class Nd4jCudaPresets implements LoadEnabled, InfoMapper {
         infoMap.put(new Info("__CUDACC__", "MAX_UINT", "HAVE_MKLDNN").define(false))
                .put(new Info("__JAVACPP_HACK__", "LIBND4J_ALL_OPS","__CUDABLAS__").define(true))
                .put(new Info("std::initializer_list", "cnpy::NpyArray", "sd::NDArray::applyLambda", "sd::NDArray::applyPairwiseLambda",
-                             "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray").skip())
+                             "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray", "std::shared_ptr", "sd::PointerWrapper", "sd::PointerDeallocator").skip())
                .put(new Info("std::string").annotations("@StdString").valueTypes("BytePointer", "String")
                                            .pointerTypes("@Cast({\"char*\", \"std::string*\"}) BytePointer"))
                 .put(new Info("std::pair<int,int>").pointerTypes("IntIntPair").define())
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java
index 508144f26..5e12f1dfd 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java
@@ -2018,13 +2018,13 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
 
     @Override
     public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) {
-        OpaqueConstantDataBuffer dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty);
+        val dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty);
         if (loop.lastErrorCode() != 0)
             throw new RuntimeException(loop.lastErrorMessage());
 
-        val result = new LongBuffer(loop.getConstantDataBufferPrimary(dbf), Shape.shapeInfoLength(shape.length));
+        val result = new LongBuffer(loop.getConstantShapeBufferPrimary(dbf), Shape.shapeInfoLength(shape.length));
 
-        loop.deleteShapeBuffer(dbf);
+        loop.deleteConstantShapeBuffer(dbf);
 
         return result;
     }
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index f17f11093..2926c06b9 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -491,6 +491,73 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
 // #endif //DEV_TESTS_DATABUFFER_H
 
 
+// Parsed from array/PointerDeallocator.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+// #ifndef SD_POINTERDEALLOCATOR_H_
+// #define SD_POINTERDEALLOCATOR_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+
+
+
+// #endif //SD_POINTERDEALLOCATOR_H_
+
+
+// Parsed from array/PointerWrapper.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+//  @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_POINTER_H_
+// #define SD_ARRAY_POINTER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <array/PointerDeallocator.h>
+// #include <memory>
+ // namespace sd
+
+// #endif //SD_ARRAY_POINTER_H_
+
+
 // Parsed from array/ConstantDataBuffer.h
 
 /*******************************************************************************
@@ -517,6 +584,9 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
 
 // #include <system/dll.h>
 // #include <system/pointercast.h>
+// #include <memory>
+// #include <array/PointerWrapper.h>
+// #include <array/DataType.h>
     @Namespace("sd") @NoOffset public static class ConstantDataBuffer extends Pointer {
         static { Loader.load(); }
         /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@@ -528,18 +598,16 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
             return (ConstantDataBuffer)super.position(position);
         }
     
-        public ConstantDataBuffer(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf) { super((Pointer)null); allocate(primary, special, numEelements, sizeOf); }
-        private native void allocate(@Cast("Nd4jPointer") Pointer primary, @Cast("Nd4jPointer") Pointer special, @Cast("Nd4jLong") long numEelements, @Cast("Nd4jLong") long sizeOf);
         public ConstantDataBuffer(@Const @ByRef ConstantDataBuffer other) { super((Pointer)null); allocate(other); }
         private native void allocate(@Const @ByRef ConstantDataBuffer other);
         public ConstantDataBuffer() { super((Pointer)null); allocate(); }
         private native void allocate();
 
-        public native @Cast("Nd4jLong") long sizeOf();
-        public native @Cast("Nd4jLong") long length();
+        public native @Cast("uint8_t") byte sizeOf();
+        public native @Cast("uint64_t") long length();
 
-        public native @Cast("Nd4jPointer") Pointer primary();
-        public native @Cast("Nd4jPointer") Pointer special();
+        public native Pointer primary();
+        public native Pointer special();
 
         public native @ByRef @Name("operator =") ConstantDataBuffer put(@Const @ByRef ConstantDataBuffer other);
     }
@@ -548,6 +616,114 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
 // #endif //DEV_TESTS_CONSTANTDATABUFFER_H
 
 
+// Parsed from array/ConstantShapeBuffer.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+// #define SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <array/PointerWrapper.h>
+// #include <memory>
+
+@Namespace("sd") public static class ConstantShapeBuffer extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ConstantShapeBuffer(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ConstantShapeBuffer(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public ConstantShapeBuffer position(long position) {
+        return (ConstantShapeBuffer)super.position(position);
+    }
+
+  public ConstantShapeBuffer() { super((Pointer)null); allocate(); }
+  private native void allocate();
+
+  public native @Cast("const Nd4jLong*") LongPointer primary();
+  public native @Cast("const Nd4jLong*") LongPointer special();
+  public native @Cast("const Nd4jLong*") LongPointer platform();
+}
+
+ // namespace sd
+
+// #endif //SD_ARRAY_CONSTANTSHAPEBUFFER_H_
+
+
+// Parsed from array/ConstantOffsetsBuffer.h
+
+/*******************************************************************************
+ * Copyright (c) 2019-2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+//
+// @author raver119@gmail.com
+//
+
+// #ifndef SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+// #define SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+
+// #include <system/dll.h>
+// #include <system/pointercast.h>
+// #include <memory>
+// #include <array/PointerWrapper.h>
+
+@Namespace("sd") public static class ConstantOffsetsBuffer extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ConstantOffsetsBuffer(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ConstantOffsetsBuffer(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public ConstantOffsetsBuffer position(long position) {
+        return (ConstantOffsetsBuffer)super.position(position);
+    }
+
+  public ConstantOffsetsBuffer() { super((Pointer)null); allocate(); }
+  private native void allocate();
+
+  public native @Cast("const Nd4jLong*") LongPointer primary();
+  public native @Cast("const Nd4jLong*") LongPointer special();
+  public native @Cast("const Nd4jLong*") LongPointer platform();
+}
+
+ // namespace sd
+
+// #endif //SD_ARRAY_CONSTANTOFFSETSBUFFER_H_
+
+
 // Parsed from array/ConstantDescriptor.h
 
 /*******************************************************************************
@@ -659,7 +835,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
 // #ifndef DEV_TESTS_TADPACK_H
 // #define DEV_TESTS_TADPACK_H
 
-// #include "ConstantDataBuffer.h"
+// #include <array/ConstantOffsetsBuffer.h>
+// #include <array/ConstantShapeBuffer.h>
     @Namespace("sd") @NoOffset public static class TadPack extends Pointer {
         static { Loader.load(); }
         /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@@ -671,8 +848,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
             return (TadPack)super.position(position);
         }
     
-        public TadPack(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); }
-        private native void allocate(@ByRef ConstantDataBuffer shapes, @ByRef ConstantDataBuffer offets, @Cast("Nd4jLong") long numTads);
+        public TadPack(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads) { super((Pointer)null); allocate(shapes, offets, numTads); }
+        private native void allocate(@Const @ByRef ConstantShapeBuffer shapes, @Const @ByRef ConstantOffsetsBuffer offets, @Cast("Nd4jLong") long numTads);
         public TadPack() { super((Pointer)null); allocate(); }
         private native void allocate();
 
@@ -863,7 +1040,7 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
         public native int _blasMinorVersion(); public native Environment _blasMinorVersion(int setter);
         public native int _blasPatchVersion(); public native Environment _blasPatchVersion(int setter);
 
-        public static native Environment getInstance();
+        public static native @ByRef Environment getInstance();
 
         public native @Cast("bool") boolean isVerbose();
         public native void setVerbose(@Cast("bool") boolean reallyVerbose);
@@ -3052,9 +3229,9 @@ public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointe
 public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo);
 public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") long[] specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo);
 
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
-public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
+public native OpaqueConstantShapeBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty);
 
 public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length);
 public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length);
@@ -3067,9 +3244,12 @@ public native OpaqueConstantDataBuffer constantBuffer(@Cast("sd::DataType") int
 public native @Cast("Nd4jPointer") Pointer getConstantDataBufferPrimary(OpaqueConstantDataBuffer dbf);
 public native @Cast("Nd4jPointer") Pointer getConstantDataBufferSpecial(OpaqueConstantDataBuffer dbf);
 public native @Cast("Nd4jLong") long getConstantDataBufferLength(OpaqueConstantDataBuffer dbf);
-public native @Cast("Nd4jLong") long getConstantDataBufferSizeOf(OpaqueConstantDataBuffer dbf);
 
-public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr);
+public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferPrimary(OpaqueConstantShapeBuffer dbf);
+public native @Cast("Nd4jPointer") Pointer getConstantShapeBufferSpecial(OpaqueConstantShapeBuffer dbf);
+
+public native void deleteConstantShapeBuffer(OpaqueConstantShapeBuffer ptr);
+public native void deleteConstantDataBuffer(OpaqueConstantDataBuffer ptr);
 
 public native OpaqueContext createGraphContext(int nodeId);
 public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr);
@@ -3643,6 +3823,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
 // #include <memory>
 // #include <array/InteropDataBuffer.h>
 // #include <memory/MemoryCounter.h>
+// #include <array/ConstantShapeBuffer.h>
 
 
 
@@ -4482,7 +4663,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
         public native void setShapeInfo(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype);
         public native void setShapeInfo(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype);
         public native void setShapeInfo(@Const @ByRef ShapeDescriptor descriptor);
-        public native void setShapeInfo(@Const @ByRef ConstantDataBuffer shapeBuffer);
+        public native void setShapeInfo(@Const @ByRef ConstantShapeBuffer shapeBuffer);
 
         /**
         *  returns absolute offset which corresponds to given sequential index
@@ -7206,13 +7387,13 @@ public static final int PREALLOC_SIZE = 33554432;
  * @param rank the rank of the shape
  */
 
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shape, int rank);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shape, int rank);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shape, int rank);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shape, int rank);
 
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongPointer shapeInfo);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") LongBuffer shapeInfo);
-    @Namespace("shape") public static native int isMatrix(@Cast("Nd4jLong*") long[] shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongPointer shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") LongBuffer shapeInfo);
+    @Namespace("shape") public static native int isMatrix(@Cast("const Nd4jLong*") long[] shapeInfo);
 /**
  * Returns the shape portion of an information
  * buffer
@@ -10072,10 +10253,10 @@ public static final int ALL_FLOATS =BFLOAT16;
 // #endif
 
 
-public static native @MemberGetter int ELEMENT_THRESHOLD();
-public static final int ELEMENT_THRESHOLD = ELEMENT_THRESHOLD();
-public static native @MemberGetter int TAD_THRESHOLD();
-public static final int TAD_THRESHOLD = TAD_THRESHOLD();
+public static native @MemberGetter double ELEMENT_THRESHOLD();
+public static final double ELEMENT_THRESHOLD = ELEMENT_THRESHOLD();
+public static native @MemberGetter double TAD_THRESHOLD();
+public static final double TAD_THRESHOLD = TAD_THRESHOLD();
 
 // #define SHAPELIST(...)  new ShapeList({__VA_ARGS__}, block.workspace() != nullptr)
 
@@ -10085,8 +10266,8 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 // #define PRINT_FIRST(...)    printf(__VA_ARGS__); fflush(stdout)
 // #endif
 
-// #define DEBUG_CALL(STREAM)      if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; }
-// #define DEBUG_KERNEL(STREAM, OP_NUM)       if (sd::Environment::getInstance()->isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString<int>(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString<int>(__LINE__); throw std::runtime_error(tOp.c_str()); }; }
+// #define DEBUG_CALL(STREAM)      if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) { throw std::runtime_error(); }; }
+// #define DEBUG_KERNEL(STREAM, OP_NUM)       if (sd::Environment::getInstance().isDebug()) { cudaError_t tRes = cudaStreamSynchronize(*STREAM); checkCudaErrors(tRes); if (tRes != 0) {std::string tFile(__FILE__); std::string tOp = "Kernel OpNum failed: [" + sd::StringUtils::valueToString<int>(OP_NUM) + std::string("]; File: ") + tFile + std::string(":") + sd::StringUtils::valueToString<int>(__LINE__); throw std::runtime_error(tOp.c_str()); }; }
 
 
 // #define LAUNCH(A, B, C, D) <<<A, B, C, D>>>
@@ -11067,7 +11248,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 
 // #define _EXPAND_OP_CALL_1(NAME, TYPE, PARAMZ, NUM_A, TYPE_A) NAME<TYPE_A<TYPE>>PARAMZ;
 // #define _EXPAND_OP_DIRECT(PARAMZ, NUM_A, TYPE_A)  case NUM_A: { z = TYPE_A<T>::op PARAMZ; break; }
-// #define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance()->storeOperation(TYPE, #TYPE_A, NUM_A);
+// #define _EXPAND_OP_CALL_T(TYPE, NUM_A, TYPE_A) OpTracker::getInstance().storeOperation(TYPE, #TYPE_A, NUM_A);
 
 // #define _EXPAND_FACTORY_CALL(TYPE, LAYER_ID, LAYER_NAME, ACTIVATION_ID, ACTIVATION_NAME) if (activationNum == ACTIVATION_ID && layerNum == LAYER_ID) { return new LAYER_NAME<TYPE, ACTIVATION_NAME<TYPE>>(); };
 
@@ -11209,7 +11390,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 //                         struct __registrator_##NAME {
 //                             __registrator_##NAME() {
 //                                 OpName *ptr = new OpName();
-//                                 OpRegistrator::getInstance()->registerOperation(ptr);
+//                                 OpRegistrator::getInstance().registerOperation(ptr);
 //                             }
 //                         };
 //                         static sd::ops::__registrator_##NAME<NAME> zzz_register_opd_##NAME;
@@ -11277,7 +11458,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 //                                                     auto shapeList = SHAPELIST();
 //                                                     auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs();
 //                                                     for (int e = 0; e < opLimit; e++) {
-//                                                         auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e)));
+//                                                         auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e)));
 //                                                         shapeList->push_back(newshape);
 //                                                     }
 //                                                     return shapeList;
@@ -11288,14 +11469,14 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 // #define DECLARE_SYN(NAME, ORIGINAL) template <typename OpName>
 //                                     struct __registratorSynonym_##NAME {
 //                                         __registratorSynonym_##NAME(const char *name, const char *oname) {
-//                                             auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance()->getOperation(oname));
+//                                             auto ptr = reinterpret_cast<OpName *>(OpRegistrator::getInstance().getOperation(oname));
 //                                             if (ptr == nullptr) {
 //                                                 std::string newName(name);
 //                                                 std::string oldName(oname);
-//                                                 OpRegistrator::getInstance()->updateMSVC(sd::ops::HashHelper::getInstance()->getLongHash(newName), oldName);
+//                                                 OpRegistrator::getInstance().pdateMSVC(sd::ops::HashHelper::getInstance().getLongHash(newName), oldName);
 //                                                 return;
 //                                             }
-//                                             OpRegistrator::getInstance()->registerOperation(name, ptr);
+//                                             OpRegistrator::getInstance().registerOperation(name, ptr);
 //                                             }
 //                                         };
 //                                         static sd::ops::__registratorSynonym_##NAME<ORIGINAL> zzz_register_opd_##NAME(#NAME, #ORIGINAL)
@@ -11339,7 +11520,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 //                                                                                     auto shapeList = SHAPELIST();
 //                                                                                     auto opLimit = this->getOpDescriptor()->getNumberOfOutputs() < 1 ? block.width() : this->getOpDescriptor()->getNumberOfOutputs();
 //                                                                                     for (int e = 0; e < opLimit; e++) {
-//                                                                                         auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e)));
+//                                                                                         auto newshape = ConstantShapeHelper::getInstance().createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e)));
 //                                                                                         shapeList->push_back(newshape);
 //                                                                                     }
 //                                                                                     return shapeList;
@@ -11434,12 +11615,12 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 
 // #else
 
-// #define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT)   if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance()->countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT));
-// #define RELEASE(VARIABLE, WORKSPACE)    if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance()->countOut(VARIABLE); delete[] VARIABLE;};
+// #define ALLOCATE(VARIABLE, WORKSPACE, LENGTH, TT)   if (WORKSPACE == nullptr) {VARIABLE = new TT[LENGTH]; sd::memory::MemoryTracker::getInstance().countIn(sd::memory::MemoryType::HOST, VARIABLE, LENGTH * sizeof(TT)); } else {VARIABLE = reinterpret_cast<TT *>(WORKSPACE->allocateBytes(LENGTH * sizeof(TT))); }; memset(VARIABLE, 0, LENGTH * sizeof(TT));
+// #define RELEASE(VARIABLE, WORKSPACE)    if (WORKSPACE == nullptr) { sd::memory::MemoryTracker::getInstance().countOut(VARIABLE); delete[] VARIABLE;};
 
 // #endif
 
-// #define CONSTANT(SHAPE) ConstantShapeHelper::getInstance()->createFromExisting(SHAPE, block.workspace())
+// #define CONSTANT(SHAPE) ConstantShapeHelper::getInstance().createFromExisting(SHAPE, block.workspace())
 
 
 
@@ -12372,7 +12553,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
             public OpRegistrator(Pointer p) { super(p); }
         
 
-            public static native OpRegistrator getInstance();
+            public static native @ByRef OpRegistrator getInstance();
 
             public static native void exitHandler();
             public static native void sigIntHandler(int sig);
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java
index c6e57e876..f10410314 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java
@@ -37,7 +37,11 @@ import java.util.Scanner;
                                               "memory/MemoryType.h",
                                               "array/DataType.h",
                                               "array/DataBuffer.h",
+                                              "array/PointerDeallocator.h",
+                                              "array/PointerWrapper.h",
                                               "array/ConstantDataBuffer.h",
+                                              "array/ConstantShapeBuffer.h",
+                                              "array/ConstantOffsetsBuffer.h",
                                               "array/ConstantDescriptor.h",
                                               "array/TadPack.h",
                                               "execution/ErrorReference.h",
@@ -164,6 +168,8 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled {
                         .put(new Info("OpaqueVariablesSet").pointerTypes("OpaqueVariablesSet"))
                         .put(new Info("OpaqueVariable").pointerTypes("OpaqueVariable"))
                         .put(new Info("OpaqueConstantDataBuffer").pointerTypes("OpaqueConstantDataBuffer"))
+                        .put(new Info("OpaqueConstantShapeBuffer").pointerTypes("OpaqueConstantShapeBuffer"))
+                        .put(new Info("OpaqueConstantOffsetsBuffer").pointerTypes("OpaqueConstantOffsetsBuffer"))
                         .put(new Info("OpaqueDataBuffer").pointerTypes("OpaqueDataBuffer"))
                         .put(new Info("OpaqueContext").pointerTypes("OpaqueContext"))
                         .put(new Info("OpaqueRandomGenerator").pointerTypes("OpaqueRandomGenerator"))
@@ -185,7 +191,7 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled {
         infoMap.put(new Info("__CUDACC__", "MAX_UINT", "HAVE_MKLDNN", "__CUDABLAS__").define(false))
                .put(new Info("__JAVACPP_HACK__", "LIBND4J_ALL_OPS").define(true))
                .put(new Info("std::initializer_list", "cnpy::NpyArray", "sd::NDArray::applyLambda", "sd::NDArray::applyPairwiseLambda",
-                             "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray").skip())
+                             "sd::graph::FlatResult", "sd::graph::FlatVariable", "sd::NDArray::subarray", "std::shared_ptr", "sd::PointerWrapper", "sd::PointerDeallocator").skip())
                .put(new Info("std::string").annotations("@StdString").valueTypes("BytePointer", "String")
                                            .pointerTypes("@Cast({\"char*\", \"std::string*\"}) BytePointer"))
                .put(new Info("std::pair<int,int>").pointerTypes("IntIntPair").define())
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java
index b4dfe31f1..24ba20057 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/profiling/PerformanceTrackerTests.java
@@ -124,6 +124,7 @@ public class PerformanceTrackerTests extends BaseNd4jTest {
     }
 
     @Test
+    @Ignore("useless these days")
     public void testTrackerGpu_1() {
         if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("cuda"))
             return;