diff --git a/libnd4j/blas/cpu/NDArrayFactory.cpp b/libnd4j/blas/cpu/NDArrayFactory.cpp
index ec99ef7db..d8b686b12 100644
--- a/libnd4j/blas/cpu/NDArrayFactory.cpp
+++ b/libnd4j/blas/cpu/NDArrayFactory.cpp
@@ -172,7 +172,9 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector<int8
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<float16>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<bfloat16>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<Nd4jLong>& data, nd4j::LaunchContext * context);
+    template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<uint64_t>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<int>& data, nd4j::LaunchContext * context);
+    template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<unsigned int>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<int16_t>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<int8_t>& data, nd4j::LaunchContext * context);
     template NDArray NDArrayFactory::create(const char order, const std::vector<Nd4jLong> &shape, const std::initializer_list<uint8_t>& data, nd4j::LaunchContext * context);
diff --git a/libnd4j/include/array/impl/NDArrayList.cpp b/libnd4j/include/array/impl/NDArrayList.cpp
index 1762565a1..75df72e70 100644
--- a/libnd4j/include/array/impl/NDArrayList.cpp
+++ b/libnd4j/include/array/impl/NDArrayList.cpp
@@ -137,8 +137,8 @@ namespace nd4j {
         auto newAxis = ShapeUtils::evalDimsToExclude(array->rankOf(), args);
         auto result = array->allTensorsAlongDimension(newAxis);
         for (int e = 0; e < result->size(); e++) {
-            auto chunk = result->at(e)->dup(array->ordering());
-            write(e, chunk);
+            auto chunk = result->at(e);//->dup(array->ordering());
+            write(e, chunk->dup(array->ordering()));
         }
         delete result;
     }
diff --git a/libnd4j/include/op_boilerplate.h b/libnd4j/include/op_boilerplate.h
index ad37a1618..ac30b28d8 100644
--- a/libnd4j/include/op_boilerplate.h
+++ b/libnd4j/include/op_boilerplate.h
@@ -1328,7 +1328,7 @@
                                                 REGISTER_C(NAME) \
                                                 nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \
                                                     auto shapeList = SHAPELIST(); \
-                                                    for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \
+                                                    for (int e = 0; e < block.width(); e++) { \
                                                         auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
                                                         shapeList->push_back(newshape); \
                                                     } \
@@ -1365,7 +1365,7 @@
                                                             REGISTER_C(NAME) \
                                                             nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \
                                                                 auto shapeList = SHAPELIST(); \
-                                                                for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \
+                                                                for (int e = 0; e < block.width(); e++) { \
                                                                     Nd4jLong* newshape; \
                                                                     COPY_SHAPE(inputShape->at(0), newshape); \
                                                                     shapeList->push_back(CONSTANT(newshape)); \
@@ -1388,7 +1388,7 @@
                                                                                 REGISTER_C(NAME) \
                                                                                 nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \
                                                                                     auto shapeList = SHAPELIST(); \
-                                                                                    for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \
+                                                                                    for (int e = 0; e < block.width(); e++) { \
                                                                                         auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \
                                                                                         shapeList->push_back(newshape); \
                                                                                     } \
diff --git a/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp b/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp
index d554d46ef..1813946d0 100644
--- a/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp
+++ b/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp
@@ -27,14 +27,14 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(toggle_bits, -1, -1, true) {
+        OP_IMPL(toggle_bits, -1, 1, true) {
 
             for (int i = 0; i < block.width(); i++) {
                 auto x = INPUT_VARIABLE(i);
                 auto z = OUTPUT_VARIABLE(i);
 
                 REQUIRE_TRUE(x->dataType() == z->dataType(), 0, "Toggle bits requires input and output to have same type");
-                REQUIRE_TRUE(x->isR(),0, "Toggle bits requires input and output to be integer type (int8, int16, int32, int64)");
+                REQUIRE_TRUE(x->isZ(),0, "Toggle bits requires input and output to be integer type (int8, int16, int32, int64)");
 
                 helpers::__toggle_bits(block.launchContext(), *x, *z);
             }
@@ -44,7 +44,8 @@ namespace nd4j {
         DECLARE_TYPES(toggle_bits) {
             getOpDescriptor()
                     ->setAllowedInputTypes({ALL_INTS})
-                    ->setSameMode(true);
+                    ->setAllowedOutputTypes({ALL_INTS})
+                    ->setSameMode(false);
         }
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp
index 59a7b7546..2cee857fa 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_double, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_double, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,12 @@ namespace nd4j {
             ->setAllowedInputTypes(nd4j::DataType::ANY)
             ->setAllowedOutputTypes(nd4j::DataType::DOUBLE);
         }
+
+        DECLARE_SHAPE_FN(to_double) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::DOUBLE, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp
index 8cdd38e4f..d3e2f3cd0 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_float16, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_float16, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,12 @@ namespace nd4j {
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
                     ->setAllowedOutputTypes(nd4j::DataType::HALF);
         }
+
+        DECLARE_SHAPE_FN(to_float16) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::HALF, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp
index 3fdcafaab..c558f4d78 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_float32, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_float32, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,12 @@ namespace nd4j {
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
                     ->setAllowedOutputTypes(nd4j::DataType::FLOAT32);
         }
+
+        DECLARE_SHAPE_FN(to_float32) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::FLOAT32, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp
index a5eef8595..960aab483 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_int32, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_int32, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,11 @@ namespace nd4j {
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
                     ->setAllowedOutputTypes(nd4j::DataType::INT32);
         }
+        DECLARE_SHAPE_FN(to_int32) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::INT32, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp
index 450c57c1d..d5721b266 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_int64, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_int64, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,11 @@ namespace nd4j {
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
                     ->setAllowedOutputTypes(nd4j::DataType::INT64);
         }
+        DECLARE_SHAPE_FN(to_int64) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::INT64, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp
index 5b6822797..b0833dd25 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_uint32, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_uint32, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -40,8 +40,13 @@ namespace nd4j {
         DECLARE_TYPES(to_uint32) {
             getOpDescriptor()
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
-                    ->setAllowedOutputTypes(nd4j::DataType::INT16);
+                    ->setAllowedOutputTypes(nd4j::DataType::INT32);
         }
+        DECLARE_SHAPE_FN(to_uint32) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::UINT32, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
+
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp
index a0402cdb7..64633d34b 100644
--- a/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp
+++ b/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(to_uint64, 1, 1, true) {
+        CUSTOM_OP_IMPL(to_uint64, 1, 1, true, 0, 0) {
             auto input = INPUT_VARIABLE(0);
             auto output = OUTPUT_VARIABLE(0);
 
@@ -42,6 +42,10 @@ namespace nd4j {
                     ->setAllowedInputTypes(nd4j::DataType::ANY)
                     ->setAllowedOutputTypes(nd4j::DataType::INT8);
         }
+        DECLARE_SHAPE_FN(to_uint64) {
+            auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::UINT64, true, block.workspace());
+            return SHAPELIST(outShape);
+        }
     }
 }
 
diff --git a/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp b/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp
index 886959943..b5e5f207e 100644
--- a/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp
+++ b/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp
@@ -26,13 +26,19 @@
 namespace nd4j {
 namespace ops {
     LIST_OP_IMPL(unstack_list, 1, 1, 0, 0) {
-        auto input = INPUT_VARIABLE(0);
+        auto outputList = INPUT_LIST(0);
+        auto input = INPUT_VARIABLE(int(outputList != nullptr) );
 
-        auto list = new NDArrayList(0, true);
-        list->unstack(input, 0);
+        if (outputList == nullptr) {
+            outputList = new NDArrayList(0, true);
+            //block.trackList(outputList);
+            setupResultList(outputList, block);
+        }
+        outputList->unstack(input, INT_ARG(0));
 
         //OVERWRITE_RESULT(list);
-        setupResultList(list, block);
+
+        //
         return Status::OK();
     }
 }
diff --git a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
index e8acfa067..2161a2378 100644
--- a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp
@@ -26,11 +26,11 @@
 namespace nd4j {
     namespace ops {
         CUSTOM_OP_IMPL(get_seed, -2, 1, false, 0, 0) {
-            REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph");
-            auto rng = block.getRNG();
+//            REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph");
+            auto rng = block.getRng();
             auto z = OUTPUT_VARIABLE(0);
 
-            z->p(Nd4jLong(0), rng->getSeed());
+            z->p(Nd4jLong(0), rng.rootState());
 
             return Status::OK();
         }
diff --git a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
index b42c7c763..fa9dcf992 100644
--- a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
+++ b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp
@@ -27,8 +27,9 @@
 namespace nd4j {
     namespace ops {
         CUSTOM_OP_IMPL(set_seed, -2, 1, false, 0, -2) {
-            REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph");
-            auto rng = block.getRNG();
+//            REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph");
+            auto rng = block.getRng(); //.getRNG();
+
             Nd4jLong seed = 0;
             if (block.getIArguments()->size() > 0) {
                 seed = INT_ARG(0);
@@ -41,8 +42,8 @@ namespace nd4j {
             }
 
             // FIXME: this approach isn't really good for cuda, since it'll assume that CUDA might get nullptr instead of stream
-            refreshBuffer(nullptr, seed, (Nd4jPointer) rng);
-
+            //refreshBuffer(nullptr, seed, (Nd4jPointer) rng);
+            rng.setSeed((int)seed);
             return Status::OK();
         }
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp b/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp
index 1398eae47..3d45bcf42 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp
@@ -25,7 +25,7 @@
 
 namespace nd4j {
     namespace ops {
-        OP_IMPL(Log1p, 2, 1, true) {
+        OP_IMPL(Log1p, 1, 1, true) {
             auto x = INPUT_VARIABLE(0);
             auto z = OUTPUT_VARIABLE(0);
 
diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp
index f087eaf1b..f6aeeebcf 100644
--- a/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp
@@ -27,7 +27,7 @@
 namespace nd4j {
 namespace ops  {
 
-OP_IMPL(mergemaxindex, -1, 1, false) {
+CUSTOM_OP_IMPL(mergemaxindex, -1, 1, false, 0, 0) {
 
     REQUIRE_OK(this->validateInputDimensionsMatch(block));
     auto output = OUTPUT_VARIABLE(0);
@@ -49,6 +49,15 @@ DECLARE_SYN(MergeMaxIndex, mergemaxindex);
                 ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS});
     }
 }
+DECLARE_SHAPE_FN(mergemaxindex) {
+    auto in = inputShape->at(0);
+    auto dtype = DataType::INT32;
+    if (block.getIArguments()->size()> 0)
+        dtype = (DataType)INT_ARG(0);
+
+    auto resShape = ShapeBuilders::copyShapeInfoAndType(in, dtype, block.workspace());
+    return SHAPELIST(resShape);
+}
 }
 
 #endif
\ No newline at end of file
diff --git a/libnd4j/include/ops/declarable/headers/datatypes.h b/libnd4j/include/ops/declarable/headers/datatypes.h
index 43983ecb6..d8ff39d48 100644
--- a/libnd4j/include/ops/declarable/headers/datatypes.h
+++ b/libnd4j/include/ops/declarable/headers/datatypes.h
@@ -30,7 +30,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_double)
-        DECLARE_OP(to_double, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_double, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -39,7 +39,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_float16)
-        DECLARE_OP(to_float16, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_float16, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -48,7 +48,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_float32)
-        DECLARE_OP(to_float32, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_float32, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -57,7 +57,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_int32)
-        DECLARE_OP(to_int32, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_int32, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -66,7 +66,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_int64)
-        DECLARE_OP(to_int64, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_int64, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -75,7 +75,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_uint32)
-        DECLARE_OP(to_uint32, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_uint32, 1, 1, true, 0, 0);
         #endif
 
         /**
@@ -84,7 +84,7 @@ namespace nd4j {
          * PLEASE NOTE: This op is disabled atm, and reserved for future releases.
          */
         #if NOT_EXCLUDED(OP_to_uint64)
-        DECLARE_OP(to_uint64, 1, 1, true);
+        DECLARE_CUSTOM_OP(to_uint64, 1, 1, true, 0, 0);
         #endif
 
         /**
diff --git a/libnd4j/include/ops/declarable/headers/transforms.h b/libnd4j/include/ops/declarable/headers/transforms.h
index b24fad482..75715f78e 100644
--- a/libnd4j/include/ops/declarable/headers/transforms.h
+++ b/libnd4j/include/ops/declarable/headers/transforms.h
@@ -65,9 +65,15 @@ namespace nd4j {
         #if NOT_EXCLUDED(OP_mergemax)
         DECLARE_OP(mergemax, -1, 1, false);
         #endif
-
+        /*
+         * Complete tensor with max indices merged from all input tensors list
+         *
+         * INPUT: tensors with the same shape
+         * OUTPUT: integer tensor with the same shape
+         * INT_ARG: result type (one of int), INT32 by default
+         */
         #if NOT_EXCLUDED(OP_mergemaxindex)
-        DECLARE_OP(mergemaxindex, -1, 1, false);
+        DECLARE_CUSTOM_OP(mergemaxindex, -1, 1, false, 0, 0);
         #endif
 
         #if NOT_EXCLUDED(OP_mergeadd)
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu
index eda19ccd8..52b059dad 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu
@@ -25,7 +25,7 @@ namespace nd4j {
     namespace ops {
         namespace helpers {
             template <typename X, typename Z>
-            void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, double min_val, double max_val) {
+            void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) {
                 int tid = blockIdx.x * blockDim.x + threadIdx.x;
                 auto dx = reinterpret_cast<X*>(xBuffer);
                 auto result = reinterpret_cast<Z*>(zBuffer);
@@ -42,19 +42,19 @@ namespace nd4j {
                 }
                 __syncthreads();
 
-                Z binSize = (max_val - min_val) / (numBins);
+                X binSize = X((*max_val - *min_val) / numBins);
 
                 for (int e = threadIdx.x; e < numBins; e += blockDim.x) {
-                    bins[e] = (Z) 0.0f;
+                    bins[e] = (Z) 0;
                 }
                 __syncthreads();
 
-                for (int e = tid; e < length; e+= blockDim.x * gridDim.x) {
-                    int idx = (int) ((dx[e] - min_val) / binSize);
-                    if (idx < 0) idx = 0;
-                    else if (idx >= numBins) idx = numBins - 1;
-
-                    nd4j::math::atomics::nd4j_atomicAdd(&bins[idx], (Z) 1.0f);
+                for (int e = tid; e < length; e += blockDim.x * gridDim.x) {
+                    int idx = int((dx[e] - *min_val) / binSize);
+                    idx = math::nd4j_max(idx, 0); //atomicMax(&idx, 0);//atomicMax(&idx, 0);
+                    idx = math::nd4j_min(idx, int(numBins - 1)); //atomicMin(&idx, int(numBins - 1));
+                    nd4j::math::atomics::nd4j_atomicAdd(&bins[idx], (Z)1);
+//                    bins[idx]++;
                 }
                 __syncthreads();
 
@@ -82,7 +82,7 @@ namespace nd4j {
 
                         // nullify shared memory for future accumulation
                         for (int e = threadIdx.x; e < numBins; e += blockDim.x) {
-                            bins[e] = (Z) 0.0f;
+                            bins[e] = (Z) 0;
                         }
 
                         // accumulate reduced bins
@@ -90,7 +90,7 @@ namespace nd4j {
                             Z *ptrBuf = ((Z *)allocationPointer) + (r * numBins);
 
                             for (int e = threadIdx.x; e < numBins; e += blockDim.x) {
-                                bins[e] += ptrBuf[e];
+                                math::atomics::nd4j_atomicAdd(&bins[e], ptrBuf[e]);
                             }
                         }
                         __syncthreads();
@@ -109,24 +109,26 @@ namespace nd4j {
             }
 
             template <typename X, typename Z>
-            static void histogram_(nd4j::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, double min_val, double max_val) {
+            static void histogram_(nd4j::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) {
                 int numThreads = 256;
                 int numBlocks = nd4j::math::nd4j_max<int>(256, nd4j::math::nd4j_min<int>(1, shape::length(xShapeInfo) / numThreads));
                 int workspaceSize = numBlocks * numBins;
-                auto tmp = NDArrayFactory::create<Z>('c',{workspaceSize});
+                auto tmp = NDArrayFactory::create<Z>('c', {workspaceSize});
 
-                histogramKernel<X, Z><<<numBlocks, numThreads, 32768, *context->getCudaStream()>>>(xBuffer, xShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, min_val, max_val);
+                histogramKernel<X, Z><<<numBlocks, numThreads, 32768, *context->getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast<X*>(min_val), reinterpret_cast<X*>(max_val));
 
                 cudaStreamSynchronize(*context->getCudaStream());
             }
 
             void histogramHelper(nd4j::LaunchContext *context, NDArray &input, NDArray &output) {
                 Nd4jLong numBins = output.lengthOf();
-                double min_val = input.reduceNumber(reduce::SameOps::Min).e<double>(0);
-                double max_val = input.reduceNumber(reduce::SameOps::Max).e<double>(0);
-
-                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES);
+                NDArray::registerSpecialUse({&output}, {&input});
 
+                auto min_val = input.reduceNumber(reduce::SameOps::Min);
+                auto max_val = input.reduceNumber(reduce::SameOps::Max);
+//                min_val.printIndexedBuffer("MIN");
+//                max_val.printIndexedBuffer("MAX");
+                BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES);
                 NDArray::registerSpecialUse({&output}, {&input});
             }
         }
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu
index 2cec0a065..0da1fbc28 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu
@@ -68,21 +68,21 @@ namespace helpers {
     static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) {
         auto tid = blockIdx.x * blockDim.x + threadIdx.x;
         auto step = gridDim.x * blockDim.x;
-        __shared__ bool shouldSelectShared;
+        __shared__ unsigned int shouldSelectShared;
         if (threadIdx.x == 0) {
-            shouldSelectShared = shouldSelect[0];
+            shouldSelectShared = (unsigned int)shouldSelect[0];
         }
         __syncthreads();
         for (int j = numSelected - 1 - tid; j >= 0; j -= step) {
             if (shouldSelectShared) {
                 if (needToSuppressWithThreshold(boxesBuf, boxesShape, indexBuf[i],
                                                                   indexBuf[selectedIndicesData[j]], T(threshold)))
-                    shouldSelectShared = false;
+                    atomicCAS(&shouldSelectShared, 1, 0);
             }
         }
         __syncthreads();
         if (threadIdx.x == 0) {
-            *shouldSelect = shouldSelectShared;
+            *shouldSelect = shouldSelectShared > 0;
         }
     }
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu
index ceb748453..082472fce 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu
@@ -48,8 +48,10 @@ namespace nd4j {
                         auto x = reinterpret_cast<T*>(inArrs[i]);
                         auto xShape = reinterpret_cast<Nd4jLong *>(inShapes[i]);
                         auto val = x[shape::getIndexOffset(e, xShape, length)];;
-                        if (mVal < val)
-                            mIdx = static_cast<Z>(e);
+                        if (mVal < val) {
+                            mIdx = static_cast<Z>(i);
+                            mVal = val;
+                        }
                     }
                     __syncthreads();
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu
index da212d287..6a9fd28e6 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu
@@ -123,14 +123,236 @@ namespace nd4j {
                 nSamplingKernel<T><<<1,1,128, *stream>>>(vsyn0, vsyn1Neg, vexpTable, vneu1e, alpha, vectorLength, code, expLength, isInference);
             }
 
+            /*
+             * binarySearch - find element in haystack buffer (haystack - sorted device memory)
+             * */
             int binarySearch(const int *haystack, const int needle, const int totalElements) {
-                return 0;
+                int firstIndex = 0;
+                int lastIndex = totalElements - 1;
+                int halfIndex = nd4j::math::nd4j_floor<float, int>((lastIndex + firstIndex) / (float) 2);
+
+                while(haystack[halfIndex] != needle && firstIndex < lastIndex) {
+                    if (needle < haystack[halfIndex]) {
+                        lastIndex = halfIndex - 1;
+                    } else if (needle > haystack[halfIndex]) {
+                        firstIndex = halfIndex + 1;
+                    }
+                    halfIndex = nd4j::math::nd4j_floor<float, int>((lastIndex + firstIndex) / (float) 2);
+                }
+
+                return (haystack[halfIndex] == needle) ? halfIndex : -1;
+            }
+            template <typename T>
+            __global__ void addInfVectorKernel(T* neu1, T* infVector, int vectorLength) {
+                auto start = blockIdx.x * blockDim.x + threadIdx.x;
+                auto step = blockDim.x * gridDim.x;
+
+                for (auto i = start; i < vectorLength; i += step) {
+                    neu1[i] += infVector[i];
+                }
             }
 
-            void skipgram(NDArray &syn0, NDArray &syn1, NDArray &syn1Neg, NDArray &expTable, NDArray &negTable, NDArray &target, NDArray &ngStarter, int nsRounds, NDArray &indices, NDArray &codes, NDArray &alpha, NDArray &randomValue, NDArray &inferenceVector, const bool preciseMode, const int numWorkers) {
+            template <typename T>
+            void skipgram_(NDArray& s0, NDArray& s1, NDArray& s1n, NDArray& expTableV, NDArray& negTableV, NDArray& infV, int target, int ngStarter, NDArray& indices, NDArray& codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds) {
+//                    void *vsyn0, void *vsyn1, void *vsyn1Neg, void *vexpTable, void *vnegTable, void *vinfVector, int target, int ngStarter, int *indices, int8_t *codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength) {
+                auto syn0 = reinterpret_cast<T*>(s0.specialBuffer());
+                auto syn1 = reinterpret_cast<T*>(s1.specialBuffer());
+                auto syn1Neg = reinterpret_cast<T*>(s1n.specialBuffer());
+                auto expTable = reinterpret_cast<T*>(expTableV.specialBuffer());
+                auto negTable = reinterpret_cast<T*>(negTableV.specialBuffer());
+                auto infVector = reinterpret_cast<T*>(infV.specialBuffer());
+                const int vocabSize = s0.sizeAt(0);
+                const int vectorLength = s0.sizeAt(1);
+                const int expLength = expTableV.lengthOf();
+                const int negLength = negTableV.lengthOf();
+                indices.tickReadDevice();
+                indices.syncToHost();
+                codes.tickReadDevice();
+                codes.syncToHost();
+                auto stream = s0.getContext()->getCudaStream();
+
+                T* neu1e; // = new T[vectorLength];
+                //memset(neu1e, 0, vectorLength * sizeof(T));
+                auto err = cudaMalloc(&neu1e, sizeof(T) * vectorLength);
+                err = cudaMemset(neu1e, 0, sizeof(T) * vectorLength);
+                // hierarchic softmax goes first (if enabled)
+
+                auto syn0row = infVector != nullptr ? infVector : syn0 + (target * vectorLength);
+                auto irow = 0;
+                if (hsRounds > 0) {
+                    for (int r = 0; r < hsRounds; r++) {
+                        irow = indices.t<int>(r);
+                        if (irow < 0 || irow >= vocabSize)
+                            break;
+
+                        hSoftmax_<T>(syn0row, syn1 + (irow * vectorLength), expTable, neu1e, alpha, vectorLength, codes.t<int8_t>(r), expLength, infVector != nullptr, stream);
+                    }
+                }
+
+                // negative sampling goes second (if enabled)
+                auto nsStarter = ngStarter;
+                irow = nsStarter;
+                if (nsRounds > 0) {
+                    for (int r = 0; r < nsRounds + 1; r++) {
+                        if (r == 0) {
+                            // target is known in advance
+                        } else {
+                            randomValue = randomValue * (unsigned long long) 25214903917 + 11;
+                            auto idx = nd4j::math::nd4j_abs<Nd4jLong >((randomValue >> 16) % negLength);
+                            irow = idx >= negLength ? -1 : negTableV.e<int>(idx);
+
+                            if (irow < 0 || irow >= vocabSize) irow = randomValue % (vocabSize - 1) + 1;
+                            if (irow == nsStarter)
+                                continue;
+                        }
+
+                        nSampling_<T>(syn0row, syn1Neg + (irow * vectorLength), expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, infVector != nullptr, stream);
+                    }
+                }
+
+                if (infVector == nullptr) {
+                    addInfVectorKernel<T><<<128, 256, 256, *stream>>>(syn0row, neu1e, vectorLength);
+                } else {
+                    addInfVectorKernel<T><<<128, 256, 256, *stream>>>(infVector, neu1e, vectorLength);
+                }
+
+                err = cudaFree(neu1e);
+                if (0 != err) {
+                    throw cuda_exception::build("helpers::skipgram_: Cannot deallocate temp memory for lingual net", err);
+                }
+            }
+            BUILD_SINGLE_TEMPLATE(template void skipgram_, (NDArray& syn0, NDArray& syn1, NDArray& syn1Neg, NDArray& expTable, NDArray& negTable, NDArray& infVector, int target, int ngStarter, NDArray& indices, NDArray& codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds), FLOAT_TYPES);
+
+            /*
+             * batched version of skipgram routine
+             * */
+            template <typename T>
+            void skipgramBatchExec_(NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTableV, NDArray& negTableV, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads) {
+//            (NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTable, NDArray& negTable, NDArray& infVector, NDArray& targets, NDArray& negStarters, NDArray& indices, NDArray& codes, NDArray& lr, NDArray& nextRandom, const int nsRounds, const bool preciseMode, const int numThreads) {
+                //auto syn0 = reinterpret_cast<T*>(vsyn0);
+                //auto syn1 = reinterpret_cast<T*>(vsyn1);
+                //auto syn1Neg = reinterpret_cast<T*>(vsyn1Neg);
+                auto stream = s0.getContext()->getCudaStream();
+                negTableV.tickReadDevice();
+                negTableV.syncToHost();
+                const auto expTable = reinterpret_cast<T*>(expTableV.specialBuffer());
+                const auto negTable = reinterpret_cast<T*>(negTableV.buffer());
+                const auto infVector = (T*)nullptr; //reinterpret_cast<T*>(infVector.specialBuffer());
+
+                const int vocabSize = s0.sizeAt(0);
+                const int vectorLength = s0.sizeAt(1);
+                const int expLength = expTableV.lengthOf();
+                const int negLength = negTableV.lengthOf();
+
+                //T sneu1e[600];
+
+                //const auto numThreads = omp_get_max_threads();
+                const auto idxShift = indices.isEmpty() ? 0 : indices.sizeAt(1);
+                const auto hsRounds = codes.isEmpty() ? 0 : codes.sizeAt(1);
+
+                // regular mode provides 0 guarantees for reproducibility
+                auto numTargets = targets.lengthOf();
+                targets.syncToHost();
+                indices.syncToHost();
+                codes.syncToHost();
+                lr.syncToHost();
+                nextRandom.syncToHost();
+                negStarters.tickReadDevice();
+                negStarters.syncToHost();
+                auto bTarget = reinterpret_cast<int*>(targets.buffer()); //targets.bufferAsT<int>();
+                auto bIndices = reinterpret_cast<int*>(indices.buffer()); //indices.bufferAsT<int>();
+                auto bCodes = reinterpret_cast<int8_t*>(codes.buffer()); //codes.bufferAsT<int8_t>();
+
+//                PRAGMA_OMP_PARALLEL_FOR_ARGS(num_threads(numThreads))
+                for (int t = 0; t < numTargets; t++) {
+                    T* neu1e;//lvectorLength <= 600 ? sneu1e : new T[vectorLength];
+                    auto err = cudaMalloc(&neu1e, vectorLength * sizeof(T));
+                    err = cudaMemset(neu1e, 0, vectorLength * sizeof(T));
+                    //memset(neu1e, 0, vectorLength * sizeof(T));
+
+                    auto target = bTarget[t];
+                    auto alpha = lr.e<double>(t);
+                    unsigned long long randomValue = nextRandom.e<Nd4jLong>(t);
+
+                    auto syn0row = reinterpret_cast<T*>(s0.specialBuffer()) + (target * vectorLength);
+
+                    if (hsRounds > 0) {
+                        int irow = 0;
+                        auto cShift = t * idxShift;
+
+                        for (int e = 0; e < hsRounds; e++) {
+                            irow = bIndices[e + cShift];
+                            if (irow < 0 || irow >= vocabSize)
+                                continue;
+
+                            auto syn1row = reinterpret_cast<T*>(s1.getSpecialBuffer()) + (irow * vectorLength);
+                            auto code = bCodes[e + cShift];
+
+                            //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code);
+                            hSoftmax_<T>(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, code, expLength, false, stream);
+                        }
+                    }
+
+
+                    if (nsRounds > 0) {
+                        int irow = negStarters.e<int>(t);
+                        int nsStarter = irow;
+                        for (int r = 0; r < nsRounds + 1; r++) {
+                            if (r == 0) {
+                                // target is known in advance
+                            } else {
+                                randomValue = randomValue * (unsigned long long) 25214903917 + 11;
+                                auto idx = nd4j::math::nd4j_abs<Nd4jLong >((randomValue >> 16) % negLength);
+                                irow = idx >= negLength ? -1 : static_cast<int>(negTable[idx]);
+
+                                if (irow < 0 || irow >= vocabSize)
+                                    irow = randomValue % (vocabSize - 1) + 1;
+
+                                if (irow == nsStarter)
+                                    continue;
+                            }
+                            auto syn1row = reinterpret_cast<T*>(s1n.getSpecialBuffer()) + (irow * vectorLength);
+
+                            nSampling_<T>(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, false, stream);
+                        }
+                    }
+                    addInfVectorKernel<T><<<128, 256, 256, *stream>>>(syn0row, neu1e, vectorLength);
+
+                    // optionally release temp arrays
+                    err = cudaFree(neu1e);
+                    if (err != 0) {
+                        break;
+                    }
+//                    if (vectorLength > 600)
+//                        delete[] neu1e;
+                }
+            }
+            BUILD_SINGLE_TEMPLATE(template void skipgramBatchExec_, (NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTable, NDArray& negTable, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads), FLOAT_TYPES);
+
+            void skipgram(NDArray &syn0, NDArray &syn1, NDArray &syn1Neg, NDArray &expTable, NDArray &negTable,
+                    NDArray &target, NDArray &ngStarter, int nsRounds, NDArray &indices, NDArray &codes, NDArray &alpha, NDArray &randomValue, NDArray &inferenceVector, const bool preciseMode, const int numWorkers) {
                 auto xType = syn0.dataType();
-
+                // single round case
+                if ((ngStarter.isScalar() && !ngStarter.isEmpty())|| (target.isScalar() && !target.isEmpty())) {
+                    auto hsRounds = codes.lengthOf();
+                    target.syncToHost();
+                    ngStarter.syncToHost();
+                    alpha.syncToHost();
+                    randomValue.syncToHost();
+                    
+                    auto targetV = target.isEmpty() ? -1 : target.e<int>(0);
+                    auto starterV = ngStarter.isEmpty() ? -1 : ngStarter.e<int>(0);
+                    auto alphaV = alpha.e<double>(0);
+                    auto randomV = randomValue.e<Nd4jLong>(0);
+                    BUILD_SINGLE_SELECTOR(xType, skipgram_, (syn0, syn1, syn1Neg, expTable, negTable, inferenceVector, targetV, starterV, indices, codes, alphaV, randomV, hsRounds, nsRounds), FLOAT_TYPES);
+                } else if (ngStarter.isVector() || target.isVector()){
+                    // batch mode
+//                     NDArray& infVector, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads)
+                    BUILD_SINGLE_SELECTOR(xType, skipgramBatchExec_, (syn0, syn1, syn1Neg, expTable, negTable, target, ngStarter, indices, codes, alpha, randomValue, nsRounds, preciseMode, numWorkers), FLOAT_TYPES);
+                } else
+                    throw std::runtime_error("SkipGram: target must have rank 0 or 1");
             }
+
             template <typename T>
             static __global__ void checkContextKernel(int* context, T* syn0, T* neu1, int contextWidth, int vectorLength, int vocabSize) {
                 __shared__ bool hasError;
@@ -157,16 +379,6 @@ namespace nd4j {
                 }
             }
 
-            template <typename T>
-            __global__ void addInfVectorKernel(T* neu1, T* infVector, int vectorLength) {
-                auto start = blockIdx.x * blockDim.x + threadIdx.x;
-                auto step = blockDim.x * gridDim.x;
-
-                for (auto i = start; i < vectorLength; i += step) {
-                    neu1[i] += infVector[i];
-                }
-            }
-
             template <typename T>
             __global__ void shiftKernel(T* neu1, T* infVector, int contextWidth, int vectorLength) {
                 auto start = blockIdx.x * blockDim.x + threadIdx.x;
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu b/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu
index c8db7b0e1..f90c9f77f 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu
@@ -26,7 +26,13 @@ namespace nd4j {
         namespace helpers {
             template<typename T>
             void toggle_bits__(NDArray &in, NDArray &out) {
+                NDArray::prepareSpecialUse({&out}, {&in});
+                auto lambda = LAMBDA_T(_x) {
+                    return ~_x;//eUtils::flip_bits(_x);
+                };
 
+                in.applyLambda(lambda, &out);
+                NDArray::registerSpecialUse({&out}, {&in});
             }
             BUILD_SINGLE_TEMPLATE(template void toggle_bits__, (NDArray &in, NDArray &out), INTEGER_TYPES);
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
index bb311ed01..a457d4c94 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu
@@ -685,13 +685,12 @@ void clipByNormBP(nd4j::LaunchContext* context, const NDArray& input, const NDAr
     BUILD_SINGLE_TEMPLATE(template void randomShuffle_, (nd4j::LaunchContext * context, NDArray& input, NDArray& output, nd4j::graph::RandomGenerator& rng, const bool isInplace), LIBND4J_TYPES);
 
 
-
     //////////////////////////////////////////////////////////////////////////
     void eye(nd4j::LaunchContext * context, NDArray& output) {
+
         output.setIdentity();
     }
 
-
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     template <typename T>
     static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) {
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp
index c2af3cef4..d02ddcb69 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp
@@ -502,6 +502,7 @@ TEST_F(DeclarableOpsTests2, Test_FloorDiv_2) {
     auto x = NDArrayFactory::create<float>('c', {1, 3}, {3.0, 6.0, -3.0});
     auto y = NDArrayFactory::create<float>('c', {1, 3}, {-2.0, 2.0, -2.0});
     auto eps = NDArrayFactory::create<float>('c', {1, 3}, {1, 2, 3});
+
     auto exp1 = NDArrayFactory::create<float>('c', {1, 3}, {0.f,  0.f, 0.f});
     auto exp2 = NDArrayFactory::create<float>('c', {1, 3}, {0.f, 0.f, 0.f});
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp
index 2e1833548..b596ebcd5 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp
@@ -223,21 +223,221 @@ TEST_F(DeclarableOpsTests5, Test_Boolean_diff_1) {
     delete result;
 }
 
+TEST_F(DeclarableOpsTests5, Test_SetSeed_1) {
+    auto x = NDArrayFactory::create<int>('c', {1, 1}, {120});
+    auto y = NDArrayFactory::create<int>(5);
 
+    nd4j::ops::set_seed op;
+    auto result = op.execute({&x, &y}, {}, {120, 5}, {}, false, nd4j::DataType::INT32);
 
+    ASSERT_EQ(Status::OK(), result->status());
+//    result->at(0)->printIndexedBuffer("RES SEED");
+    nd4j::ops::get_seed getOp;
+    auto getRes = getOp.execute({}, {}, {});
+    ASSERT_EQ(Status::OK(), getRes->status());
+//    getRes->at(0)->printIndexedBuffer("Output RES GET SEED");
+//    ASSERT_EQ(result->at(0)->t<bool>(0), true);
+    delete result;
+    delete getRes;
+}
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, scatterMul_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64);
+    auto updates = NDArrayFactory::create<float>('c', {1, 2}, {10, 1});
+    auto exp = NDArrayFactory::create<float>('c', {2, 2}, {10, 2, 3, 4});
 
+    nd4j::ops::scatter_mul op;
+    auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
 
+    auto z = result->at(0);
 
+    ASSERT_TRUE(exp.equalsTo(z));
 
+    delete result;
+}
 
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, scatterDiv_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64);
+    auto updates = NDArrayFactory::create<float>('c', {1, 2}, {10, 1});
+    auto exp = NDArrayFactory::create<float>('c', {2, 2}, {0.10, 2, 3, 4});
 
+    nd4j::ops::scatter_div op;
+    auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
 
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Scatter Div");
+    ASSERT_TRUE(exp.equalsTo(z));
 
+    delete result;
+}
 
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, scatterSub_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64);
+    auto updates = NDArrayFactory::create<float>('c', {1, 2}, {10, 1});
+    auto exp = NDArrayFactory::create<float>('c', {2, 2}, {-9, 1, 3, 4});
 
+    nd4j::ops::scatter_sub op;
+    auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
 
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Scatter Sub");
+    ASSERT_TRUE(exp.equalsTo(z));
 
+    delete result;
+}
 
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, hardsigmoid_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    auto exp = NDArrayFactory::create<float>('c', {2, 2}, {0.7, 0.9, 1, 1});
+
+    nd4j::ops::hardsigmoid op;
+    auto result = op.execute({&matrix}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+    z->printIndexedBuffer("Hadrdsigmoid 2x2");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, hardsigmoid_test2) {
+    auto matrix = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    auto eps = NDArrayFactory::create<float>('c', {2, 2}, {1, 2, 3, 4});
+    auto exp = NDArrayFactory::create<float>('c', {2, 2}, {0.2, 0.4, 0, 0});
+
+    nd4j::ops::hardsigmoid_bp op;
+    auto result = op.execute({&matrix, &eps}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+    z->printIndexedBuffer("Hadrdsigmoid 2x2");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, hardtanh_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4});
+    auto exp = NDArrayFactory::create<float>('c', {3, 3}, {-1, -1, -1, -1, 0, 1, 1, 1, 1});
+
+    nd4j::ops::hardtanh op;
+    auto result = op.execute({&matrix}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Hardtanh 2x2");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, hardtanh_test2) {
+    auto matrix = NDArrayFactory::create<float>('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4});
+    auto eps = NDArrayFactory::create<float>('c', {3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
+    auto exp = NDArrayFactory::create<float>('c', {3, 3}, {0, 0, 0, 4, 5, 6, 0, 0, 0});
+
+    nd4j::ops::hardtanh_bp op;
+    auto result = op.execute({&matrix, &eps}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Hardtanh_bp 2x2");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, histogram_test1) {
+    auto matrix = NDArrayFactory::create<double>('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4});
+    auto exp = NDArrayFactory::create<Nd4jLong>('c', {3}, {3, 3, 3});
+
+    nd4j::ops::histogram op;
+    auto result = op.execute({&matrix}, {}, {3}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Histogram3");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, histogram_test2) {
+    auto matrix = NDArrayFactory::create<double>('c', {3}, {1, 2, 1});
+    auto exp = NDArrayFactory::create<Nd4jLong>('c', {4}, {2, 0, 0, 1});
+
+    nd4j::ops::histogram op;
+    auto result = op.execute({&matrix}, {}, {4}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+    z->printIndexedBuffer("Histogram4");
+    ASSERT_TRUE(exp.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, Identity_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4});
+//    auto exp = NDArrayFactory::create<Nd4jLong>('c', {3, 3}, {3, 3, 3});
+
+    nd4j::ops::identity op;
+    auto result = op.execute({&matrix}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("Histogram3");
+    ASSERT_TRUE(matrix.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, Identity_test2) {
+    auto matrix = NDArrayFactory::create<float>('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4});
+    auto eps = NDArrayFactory::create<float>('c', {3, 3}, {1,2,3,4,5,6,7,8,9});
+//    auto exp = NDArrayFactory::create<float>('c', {3,3});
+    nd4j::ops::identity_bp op;
+    auto result = op.execute({&matrix, &eps}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+    z->printIndexedBuffer("Identity_BP");
+    ASSERT_TRUE(z->equalsTo(eps));
+
+    delete result;
+}
+////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests5, Log1p_test1) {
+    auto matrix = NDArrayFactory::create<float>('c', {3, 3}, {4, 3, 2, 1, 0, 1, 2, 3, 4});
+    auto y = NDArrayFactory::create<float>('c', {3,3}, {5,4,3,2,1,2,3,4,5});
+    //  auto eps = NDArrayFactory::create<float>('c', {3, 3}, {1,2,3,4,5,6,7,8,9});
+//    auto exp = NDArrayFactory::create<float>('c', {3,3});
+    nd4j::ops::Log1p op;
+    y.applyTransform(nd4j::transform::Log, nullptr, nullptr);
+    auto result = op.execute({&matrix}, {}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+
+    auto z = result->at(0);
+    z->printIndexedBuffer("Log1p");
+    ASSERT_TRUE(z->equalsTo(y));
+
+    delete result;
+}
 
 TEST_F(DeclarableOpsTests5, Test_SpaceToBatch_1) {
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp
index 24701f70f..81086594d 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp
@@ -737,6 +737,44 @@ TEST_F(DeclarableOpsTests6, cumSum_20) {
     delete result;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests6, TestMergeMaxIndex_1) {
+
+    auto x = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
+    auto y = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto z = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 20.f, 3.f, 40.f, 5.f, 60.f, 7.f, 80.f});
+    auto exp = NDArrayFactory::create<int>('c', {2, 2, 2}, {1, 2, 1, 2, 1, 2, 1, 2});
+    nd4j::ops::mergemaxindex op;
+
+    auto ress = op.execute({&x, &y, &z}, {}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, ress->status());
+//    ress->at(0)->printIndexedBuffer("MergeMaxIndex Result is ");
+//    ress->at(0)->printShapeInfo("Shape info for MergeMaxIdex");
+//    x.printIndexedBuffer("Input is");
+    ASSERT_TRUE(ress->at(0)->equalsTo(exp));
+    delete ress;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests6, TestMergeMaxIndex_2) {
+
+    auto x = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
+    auto y = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto z = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 20.f, 3.f, 40.f, 5.f, 60.f, 7.f, 80.f});
+    auto exp = NDArrayFactory::create<Nd4jLong>('c', {2, 2, 2}, {1, 2, 1, 2, 1, 2, 1, 2});
+    nd4j::ops::mergemaxindex op;
+
+    auto ress = op.execute({&x, &y, &z}, {}, {nd4j::DataType::INT64}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, ress->status());
+//    ress->at(0)->printIndexedBuffer("MergeMaxIndex2 Result is ");
+//    ress->at(0)->printShapeInfo("Shape info for MergeMaxIdex2");
+//    x.printIndexedBuffer("Input is");
+    ASSERT_TRUE(ress->at(0)->equalsTo(exp));
+    delete ress;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests6, TestDropout_1) {
 
@@ -752,8 +790,60 @@ TEST_F(DeclarableOpsTests6, TestDropout_1) {
 
     delete ress;
 }
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests6, TestMod_1) {
 
+    auto x = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
+    auto y = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto exp = NDArrayFactory::create<double>('c', {2, 2, 2}, {1, 0, 3, 0, 5, 0, 7, 0});
+    nd4j::ops::mod op;
 
+    auto ress = op.execute({&x, &y}, {}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, ress->status());
+//    ress->at(0)->printIndexedBuffer("MOD Result is ");
+//    x.printIndexedBuffer("Input is");
+    ASSERT_TRUE(ress->at(0)->equalsTo(exp));
+    delete ress;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests6, TestMod_BP_1) {
+
+    auto x = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
+    auto y = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto eps = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto exp = NDArrayFactory::create<double>('c', {2, 2, 2});
+    nd4j::ops::mod_bp op;
+
+    auto ress = op.execute({&x, &y, &eps}, {}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, ress->status());
+//    ress->at(0)->printIndexedBuffer("MOD_BP Result is ");
+
+    //    x.printIndexedBuffer("Input is");
+    ASSERT_TRUE(ress->at(0)->equalsTo(exp));
+    delete ress;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests6, TestRank_1) {
+
+    auto x = NDArrayFactory::create<double>('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
+    auto y = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto eps = NDArrayFactory::create<double>('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f});
+    auto exp = NDArrayFactory::create<int>(3);
+    nd4j::ops::rank op;
+
+    auto ress = op.execute({&x}, {}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, ress->status());
+    ress->at(0)->printIndexedBuffer("RANK Result is ");
+
+    //    x.printIndexedBuffer("Input is");
+    ASSERT_TRUE(ress->at(0)->equalsTo(exp));
+    delete ress;
+}
 TEST_F(DeclarableOpsTests6, TestDropout_2) {
 //    auto x0 = NDArrayFactory::create<double>('c', {10, 10});
 //    auto x1 = NDArrayFactory::create<double>('c', {10, 10});
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp
index febb65c21..996dd4f23 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp
@@ -24,6 +24,7 @@
 #include <helpers/helper_hash.h>
 #include <NDArray.h>
 #include <array/NDArrayList.h>
+#include <GradCheck.h>
 
 
 using namespace nd4j;
@@ -3605,6 +3606,289 @@ TEST_F(DeclarableOpsTests7, transpose_test3) {
     delete result;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, rationaltanh_test1) {
+
+    auto input = NDArrayFactory::create<double>('c', {8},   {0, 1, 2, 3, 4, 5, 6, 7});
+    NDArray exp = NDArrayFactory::create<double>({0.000000, 0.998222, 1.516093, 1.658054, 1.695077, 1.706884, 1.711427, 1.713446});
+
+    nd4j::ops::rationaltanh op;
+    auto result = op.execute({&input}, {}, {});
+    auto output = result->at(0);
+//    output->printIndexedBuffer("Output rationaltanh");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, rationaltanh_test2) {
+
+    auto input = NDArrayFactory::create<double>('c', {2,2,2},   {0, 1, 2, 3, 4, 5, 6, 7});
+    NDArray exp = NDArrayFactory::create<double>('c', {2,2,2}, {0.000000, 0.998222, 1.516093, 1.658054, 1.695077, 1.706884, 1.711427, 1.713446});
+
+    nd4j::ops::rationaltanh op;
+    auto result = op.execute({&input}, {}, {});
+    auto output = result->at(0);
+//    output->printIndexedBuffer("Output rationaltanh");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, rationaltanh_test3) {
+
+    auto input = NDArrayFactory::create<double>('c', {2,2,2},   {0, 1, 2, 3, 4, 5, 6, 7});
+    auto eps = NDArrayFactory::create<double>('c', {2,2,2},   {1, 2, 3, 4, 5, 6, 7, 8});
+    NDArray exp = NDArrayFactory::create<double>('c', {2,2,2}, {1.143933, 1.605747, 0.795557, 0.261710, 0.095832, 0.041218, 0.020221, 0.010971});
+
+    nd4j::ops::rationaltanh_bp op;
+    auto result = op.execute({&input, &eps}, {}, {});
+    auto output = result->at(0);
+//    output->printBuffer("Output rationaltanh BP");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, rectifiedtanh_test1) {
+
+    auto input = NDArrayFactory::create<double>('c', {2,2,2},   {0, 1, 2, 3, 4, 5, 6, 7});
+    NDArray exp = NDArrayFactory::create<double>('c', {2,2,2}, {0.000000, 0.761594, 0.964028, 0.995055, 0.999329, 0.999909, 0.999988, 0.999998});
+
+    nd4j::ops::rectifiedtanh op;
+    auto result = op.execute({&input}, {}, {});
+    auto output = result->at(0);
+//    output->printIndexedBuffer("Output rectifiedtanh");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, rectifiedtanh_test2) {
+
+    auto input = NDArrayFactory::create<double>('c', {2,2,2},   {0, 1, 2, 3, 4, 5, 6, 7});
+    auto eps = NDArrayFactory::create<double>('c', {2,2,2},   {1, 2, 3, 4, 5, 6, 7, 8});
+    NDArray exp = NDArrayFactory::create<double>('c', {2,2,2}, {0.000000, 0.839949, 0.211952, 0.039464, 0.006705, 0.001089, 0.000172, 0.000027});
+
+    nd4j::ops::rectifiedtanh_bp op;
+    auto result = op.execute({&input, &eps}, {}, {});
+    auto output = result->at(0);
+//    output->printBuffer("Output rectifiedtanh BP");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, RealDiv_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+    NDArray y = NDArrayFactory::create<float>('c', {1, 2}, {1,2});
+    NDArray e = NDArrayFactory::create<float>('c', {1, 2, 2}, {2, 1, 4, 2});
+
+    nd4j::ops::realdiv op;
+    auto result = op.execute({&x, &y}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput RealDiv");
+    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, RealDiv_BP_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+    NDArray y = NDArrayFactory::create<float>('c', {1, 2}, {1,2});
+    NDArray e0 = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 5});
+    NDArray e1 = NDArrayFactory::create<float>('c', {1, 2}, {-14, -5});
+    NDArray eps = NDArrayFactory::create<float>('c', {1, 2, 2}, {1, 2, 3, 4});
+
+    nd4j::ops::realdiv_bp op;
+    auto result = op.execute({&x, &y, &eps}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z0 = result->at(0);
+    auto z1 = result->at(1);
+//    z0->printShapeInfo("OUtput RealDiv BP0 shape");
+//    z1->printShapeInfo("OUtput RealDiv BP1 shape");
+//    z0->printIndexedBuffer("OUtput RealDiv BP0");
+//    z1->printIndexedBuffer("OUtput RealDiv BP1");
+//    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e0.equalsTo(z0));
+    ASSERT_TRUE(e1.equalsTo(z1));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, ShapesOf_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+//    NDArray y = NDArrayFactory::create<float>('c', {1, 2}, {1,2});
+    NDArray e = NDArrayFactory::create<Nd4jLong>({1, 2, 1});
+
+    nd4j::ops::shapes_of op;
+    auto result = op.execute({&x}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput RealDiv");
+//    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, ShapesOf_2) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+    NDArray y = NDArrayFactory::create<float>('c', {1, 2}, {1,2});
+    NDArray e0 = NDArrayFactory::create<Nd4jLong>({1, 2, 1});
+    NDArray e1 = NDArrayFactory::create<Nd4jLong>({1, 2});
+
+    nd4j::ops::shapes_of op;
+    auto result = op.execute({&x, &y}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z0 = result->at(0);
+    auto z1 = result->at(1);
+//    z0->printIndexedBuffer("OUtput shapes2");
+//    z1->printIndexedBuffer("OUtput shapes2");
+//    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e0.equalsTo(z0));
+    ASSERT_TRUE(e1.equalsTo(z1));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Size_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+    NDArray y = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray e = NDArrayFactory::create<Nd4jLong>(2);
+
+    nd4j::ops::size op;
+    auto result = op.execute({&x}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput SIZE");
+///    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Size_2) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {1, 2, 1}, {2, 4});
+    NDArray y = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray e = NDArrayFactory::create<Nd4jLong>(10);
+
+    nd4j::ops::size op;
+    auto result = op.execute({&y}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput SIZE");
+///    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Softplus_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray e = NDArrayFactory::create<float>('c', {5, 2}, {1.3132616,  2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016});
+
+    nd4j::ops::softplus op;
+    auto result = op.execute({&x}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput Softplus");
+///    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Softplus_BP_1) {
+
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+//    NDArray e = NDArrayFactory::create<float>('c', {5, 2}, {1.3132616,  2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016});
+    NDArray eps = NDArrayFactory::create<double>('c', {5, 2}, {1,2,3,4,5,6,7,8, 9, 10});
+    nd4j::ops::softplus ffOP;
+    nd4j::ops::softplus_bp bpOp;
+    const OpArgsHolder argsHolderFF({&x}, {}, {});
+    const OpArgsHolder argsHolderBP({&x, &eps}, {}, {});
+
+    bool gradOK = GradCheck::checkGrad(ffOP, bpOp, argsHolderFF, argsHolderBP);
+
+    ASSERT_TRUE(gradOK);
+//
+//    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput Softplus");
+/////    ASSERT_TRUE(e.isSameShape(z));
+//    ASSERT_TRUE(e.equalsTo(*z));
+//
+//    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Softsign_1) {
+
+    NDArray x = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray e = NDArrayFactory::create<float>('c', {5, 2}, {0.5, 0.6666667, 0.75, 0.8, 0.8333333, 0.875, 0.9, 0.90909094, 0.90909094, 0.9166667});
+
+    nd4j::ops::softsign op;
+    auto result = op.execute({&x}, {}, {});
+
+    ASSERT_EQ(Status::OK(), result->status());
+
+    auto z = result->at(0);
+//    z->printIndexedBuffer("OUtput Softsign");
+///    ASSERT_TRUE(e.isSameShape(z));
+    ASSERT_TRUE(e.equalsTo(*z));
+
+    delete result;
+}
+
+TEST_F(DeclarableOpsTests7, Softsign_BP_1) {
+
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+//    NDArray e = NDArrayFactory::create<float>('c', {5, 2}, {1.3132616,  2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016});
+    NDArray eps = NDArrayFactory::create<double>('c', {5, 2}, {1,2,3,4,5,6,7,8, 9, 10});
+    nd4j::ops::softsign ffOP;
+    nd4j::ops::softsign_bp bpOp;
+    const OpArgsHolder argsHolderFF({&x}, {}, {});
+    const OpArgsHolder argsHolderBP({&x, &eps}, {}, {});
+
+    bool gradOK = GradCheck::checkGrad(ffOP, bpOp, argsHolderFF, argsHolderBP);
+
+    ASSERT_TRUE(gradOK);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests7, fill_test2) {
 
@@ -3644,6 +3928,185 @@ TEST_F(DeclarableOpsTests7, fill_test3) {
     delete result;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, ToggleBits_test1) {
+
+    auto x = NDArrayFactory::create<int>('c', {2},  {2, 2});
+    auto exp = NDArrayFactory::create<int>('c', {2}, {-3, -3});
+
+    nd4j::ops::toggle_bits op;
+    auto result = op.execute({&x}, {}, {}, {}, false, nd4j::DataType::INT32);
+    auto output = result->at(0);
+
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, ToggleBits_test2) {
+
+    auto x = NDArrayFactory::create<int>('c', {2},  {2, 2});
+    auto y = NDArrayFactory::create<int>('c', {2},  {1, 1});
+    auto exp0 = NDArrayFactory::create<int>('c', {2}, {-3, -3});
+    auto exp1 = NDArrayFactory::create<int>('c', {2}, {-2, -2});
+
+    nd4j::ops::toggle_bits op;
+    auto result = op.execute({&x, &y}, {}, {}, {}, false, nd4j::DataType::INT32);
+    auto output = result->at(0);
+    auto z = result->at(1);
+
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp0.isSameShape(output));
+    ASSERT_TRUE(exp0.equalsTo(output));
+    ASSERT_TRUE(exp1.isSameShape(z));
+    ASSERT_TRUE(exp1.equalsTo(z));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, Truncatediv_test1) {
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray y = NDArrayFactory::create<double >('c', {5, 2}, {2,2,2,2,2,2,2,2, 2, 2});
+    NDArray exp = NDArrayFactory::create<double >('c', {5, 2}, {0.5, 1., 1.5, 2., 2.5, 3.5, 4.5, 5., 5., 5.5});
+
+    nd4j::ops::truncatediv op;
+    auto result = op.execute({&x, &y}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+    auto output = result->at(0);
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp.isSameShape(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, Truncatediv_test2) {
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray y = NDArrayFactory::create<double >('c', {1, 2}, {2,2});
+    NDArray exp = NDArrayFactory::create<double >('c', {5, 2}, {0.5, 1., 1.5, 2., 2.5, 3.5, 4.5, 5., 5., 5.5});
+
+    nd4j::ops::truncatediv op;
+    auto result = op.execute({&x, &y}, {}, {});
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+    auto output = result->at(0);
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp.isSameShape(output));
+
+    delete result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, TypesConversion_test1) {
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expI = NDArrayFactory::create<int>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expL = NDArrayFactory::create<Nd4jLong>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expF = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expF16 = NDArrayFactory::create<float16>('c', {5, 2}, {1.f,2.f,3.f,4.f,5.f,7.f,9.f,10.f, 10.f, 11.f});
+
+    nd4j::ops::to_int32 op32;
+    nd4j::ops::to_int64 op64;
+    auto result32 = op32.execute({&x}, {}, {});
+    auto result64 = op64.execute({&x}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, result32->status());
+    ASSERT_EQ(ND4J_STATUS_OK, result64->status());
+    auto out1 = result32->at(0);
+//    out1->printIndexedBuffer("OUT_I");
+    auto out2 = result64->at(0);
+//    out2->printIndexedBuffer("OUT_L");
+
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(expI.equalsTo(out1));
+    ASSERT_TRUE(expL.equalsTo(out2));
+
+    delete result32;
+    delete result64;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, TypesConversion_test2) {
+    NDArray x = NDArrayFactory::create<double >('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expF = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray expH = NDArrayFactory::create<float16>('c', {5, 2}, {1.f,2.f,3.f,4.f,5.f,7.f,9.f,10.f, 10.f, 11.f});
+
+    nd4j::ops::to_float32 op32;
+    nd4j::ops::to_float16 op16;
+    auto result32 = op32.execute({&x}, {}, {});
+    auto result16 = op16.execute({&x}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, result32->status());
+    ASSERT_EQ(ND4J_STATUS_OK, result16->status());
+    auto out1 = result32->at(0);
+//    out1->printIndexedBuffer("OUT_F");
+    auto out2 = result16->at(0);
+//    out2->printIndexedBuffer("OUT_H");
+
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(expF.equalsTo(out1));
+    ASSERT_TRUE(expH.equalsTo(out2));
+
+    delete result32;
+    delete result16;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, TypesConversion_test3) {
+    NDArray x = NDArrayFactory::create<Nd4jLong>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray exp32 = NDArrayFactory::create<unsigned int>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray exp64 = NDArrayFactory::create<uint64_t>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+
+    nd4j::ops::to_uint32 op32;
+    nd4j::ops::to_uint64 op64;
+    auto result32 = op32.execute({&x}, {}, {});
+    auto result64 = op64.execute({&x}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, result32->status());
+    ASSERT_EQ(ND4J_STATUS_OK, result64->status());
+    auto out1 = result32->at(0);
+//    out1->printIndexedBuffer("OUT_U32");
+    auto out2 = result64->at(0);
+//    out2->printIndexedBuffer("OUT_U64");
+
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp32.equalsTo(out1));
+    ASSERT_TRUE(exp64.equalsTo(out2));
+
+    delete result32;
+    delete result64;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+TEST_F(DeclarableOpsTests7, TypesConversion_test4) {
+    NDArray x = NDArrayFactory::create<Nd4jLong>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray exp32 = NDArrayFactory::create<float>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+    NDArray exp64 = NDArrayFactory::create<double>('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11});
+
+    nd4j::ops::to_float32 op32;
+    nd4j::ops::to_double op64;
+    auto result32 = op32.execute({&x}, {}, {});
+    auto result64 = op64.execute({&x}, {}, {});
+
+    ASSERT_EQ(ND4J_STATUS_OK, result32->status());
+    ASSERT_EQ(ND4J_STATUS_OK, result64->status());
+    auto out1 = result32->at(0);
+    out1->printIndexedBuffer("OUT_F");
+    auto out2 = result64->at(0);
+    out2->printIndexedBuffer("OUT_D");
+
+//    output->printIndexedBuffer("Toggled");
+    ASSERT_TRUE(exp32.equalsTo(out1));
+    ASSERT_TRUE(exp64.equalsTo(out2));
+
+    delete result32;
+    delete result64;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests7, mirrorPad_test1) {
 
diff --git a/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp b/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp
index ac778d971..2890c9012 100644
--- a/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp
@@ -78,6 +78,71 @@ TEST_F(ListOperationsTests, BasicTest_Stack_1) {
     delete tads;
 }
 
+TEST_F(ListOperationsTests, BasicTest_UnStackList_1) {
+    NDArrayList list(0, true);
+    auto x = NDArrayFactory::create<double>('c', {10, 100});
+    auto tads = x.allTensorsAlongDimension({1});
+    for (int e = 0; e < 10; e++) {
+        auto row = NDArrayFactory::create_<double>('c', {100});
+        row->assign((double) e);
+        //list.write(e, row);
+        tads->at(e)->assign(row);
+        delete row;
+    }
+
+    nd4j::ops::unstack_list op;
+
+    auto result = op.execute(&list, {&x}, {}, {0});
+
+    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+    ASSERT_EQ(list.elements(), 10);
+
+//    auto z = result->at(0);
+//    z->printShapeInfo("The first of");
+//    ASSERT_TRUE(exp.isSameShape(z));
+//    ASSERT_TRUE(exp.equalsTo(z));
+    for (int e = 0; e < 10; e++) {
+        auto row = list.read(e);
+        ASSERT_TRUE(row->equalsTo(tads->at(e)));
+        //list.write(e, row);
+    }
+
+    delete result;
+    delete tads;
+}
+
+//TEST_F(ListOperationsTests, BasicTest_UnStackList_2) {
+////    NDArrayList list(0, true);
+//    auto x = NDArrayFactory::create<double>('c', {10, 100});
+//    auto tads = x.allTensorsAlongDimension({1});
+//    for (int e = 0; e < 10; e++) {
+//        auto row = NDArrayFactory::create_<double>('c', {100});
+//        row->assign((double) e);
+//        //list.write(e, row);
+//        tads->at(e)->assign(row);
+//        delete row;
+//    }
+//
+//    nd4j::ops::unstack_list op;
+//
+//    auto result = op.execute(nullptr, {&x}, {}, {0});
+//
+//    ASSERT_EQ(ND4J_STATUS_OK, result->status());
+//    ASSERT_EQ(result->size(), 10);
+//
+//    //    auto z = result->at(0);
+////    z->printShapeInfo("The first of");
+////    ASSERT_TRUE(exp.isSameShape(z));
+////    ASSERT_TRUE(exp.equalsTo(z));
+//    for (int e = 0; e < 10; e++) {
+//        auto row = result->at(e);
+//        ASSERT_TRUE(row->equalsTo(tads->at(e)));
+//        //list.write(e, row);
+//    }
+//
+//    delete result;
+//    delete tads;
+//}
 
 TEST_F(ListOperationsTests, BasicTest_Read_1) {
     NDArrayList list(10);