/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * Copyright (c) 2019 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ // // Created by raver119 on 20.11.17. // #include "testlayers.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace sd; using namespace sd::graph; class PlaygroundTests : public testing::Test { public: int numIterations = 3; int poolSize = 10; PlaygroundTests() { } }; TEST_F(PlaygroundTests, test_avx) { nd4j_printf("Optimal level: %i; Binary level: %i;\n", ::optimalLevel(), ::binaryLevel()); } TEST_F(PlaygroundTests, test_biasAdd_1) { auto x = NDArrayFactory::create('c', {512, 3072}); auto y = NDArrayFactory::create('c', {3072}); std::vector values; sd::ops::biasadd op; for (int e = 0; e < 100; e++) { auto timeStart = std::chrono::system_clock::now(); op.execute({&x, &y}, {&x}); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); } TEST_F(PlaygroundTests, test_bert_full_1) { #ifdef _RELEASE // this test will run ONLY if this model exists if (sd::graph::getFileSize("/home/raver119/Downloads/BertFull/model.fb") < 0) return; auto graph = GraphExecutioner::importFromFlatBuffers("/home/raver119/Downloads/BertFull/model.fb"); auto t = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/BertFull/in0_IteratorGetNext.npy"); auto u = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/BertFull/in1_IteratorGetNext_1.npy"); auto v = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/BertFull/in2_IteratorGetNext_4.npy"); auto z = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/BertFull/out_loss-Softmax.npy"); //graph->printOut(); graph->tagInplaceNodes(); graph->getVariableSpace()->putVariable(658,0, t); graph->getVariableSpace()->putVariable(659,0, u); graph->getVariableSpace()->putVariable(660,0, v); /* // validating graph now auto status = GraphExecutioner::execute(graph); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(graph->getVariableSpace()->hasVariable(1620)); auto array = graph->getVariableSpace()->getVariable(1620)->getNDArray(); ASSERT_EQ(z, *array); */ sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); sd::Environment::getInstance().setProfiling(false); delete profile; /* std::vector values; for (int e = 0; e < 1; e++) { auto timeStart = std::chrono::system_clock::now(); GraphExecutioner::execute(graph); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); */ delete graph; #endif } TEST_F(PlaygroundTests, test_bert_1) { #ifdef _RELEASE // this test will run ONLY if this model exists if (sd::graph::getFileSize("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb") < 0) return; auto graph = GraphExecutioner::importFromFlatBuffers("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb"); auto t = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext.numpy"); auto u = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_1.numpy"); auto v = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_4.numpy"); auto z = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model_output.numpy"); //graph->printOut(); graph->tagInplaceNodes(); graph->getVariableSpace()->putVariable(85,0, t); graph->getVariableSpace()->putVariable(86,0, u); graph->getVariableSpace()->putVariable(87,0, v); /* // validating graph now auto status = GraphExecutioner::execute(graph); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(graph->getVariableSpace()->hasVariable(198)); auto array = graph->getVariableSpace()->getVariable(198)->getNDArray(); ASSERT_EQ(z, *array); */ sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); sd::Environment::getInstance().setProfiling(false); delete profile; /* std::vector values; for (int e = 0; e < 1; e++) { auto timeStart = std::chrono::system_clock::now(); GraphExecutioner::execute(graph); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); */ delete graph; #endif } TEST_F(PlaygroundTests, test_bert_2) { #ifdef _RELEASE // this test will run ONLY if this model exists if (sd::graph::getFileSize("/home/raver119/Downloads/Bert_minimal_model/bert_like_ops.fb") < 0) return; auto graph = GraphExecutioner::importFromFlatBuffers("/home/raver119/Downloads/Bert_minimal_model/bert_like_ops.fb"); //graph->printOut(); graph->tagInplaceNodes(); /* // validating graph now auto status = GraphExecutioner::execute(graph); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(graph->getVariableSpace()->hasVariable(198)); auto array = graph->getVariableSpace()->getVariable(198)->getNDArray(); ASSERT_EQ(z, *array); */ sd::Environment::getInstance().setProfiling(true); auto profile = GraphProfilingHelper::profile(graph, 1); profile->printOut(); sd::Environment::getInstance().setProfiling(false); delete profile; /* std::vector values; for (int e = 0; e < 1; e++) { auto timeStart = std::chrono::system_clock::now(); GraphExecutioner::execute(graph); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); */ delete graph; #endif } TEST_F(PlaygroundTests, test_one_off_ops_1) { auto x = NDArrayFactory::create('c', {4, 128, 768}); auto y = NDArrayFactory::create('c', {4, 128, 1}); auto z = x.ulike(); sd::ops::squaredsubtract op; op.execute({&x, &y}, {&z}); } #if defined(INDEX_REDUCTIONS_BENCH_TESTS) //temporarly, testing against the original one void original_argmax(const NDArray& input, std::vector& axis, NDArray& output) { sd::ops::helpers::adjustAxis(input.rankOf(), axis); input.applyIndexReduce(sd::indexreduce::IndexMax, output, axis); } template void fill_random(sd::NDArray& arr) { Nd4jLong coords[MAX_RANK] = {}; std::random_device rd; std::mt19937 gen(rd()); //for floats std::uniform_real_distribution dis((T)-10.0, (T)22.9); T* x = arr.bufferAsT(); Nd4jLong* shapeInfo = arr.getShapeInfo(); Nd4jLong* strides = arr.stridesOf(); Nd4jLong rank = shapeInfo[0]; Nd4jLong* bases = &(shapeInfo[1]); size_t t = 1; for (size_t i = 0; i < rank ; i++) { t *= bases[i]; } size_t offset = 0; if (arr.ordering() == 'c') { for (size_t i = 0; i < t; i++) { x[offset] = dis(gen) ; offset = sd::inc_coords(bases, strides, coords, offset, rank); } } else { for (size_t i = 0; i < t; i++) { x[offset] = dis(gen) ; offset = sd::inc_coords(bases, strides, coords, offset, rank); } } } void testLegacy(bool random) { #if 0 int bases[] = { 3, 2, 4, 5, 7 }; constexpr int Loop = 1; #else int bases[] = { 8, 32, 64, 32, 64 }; constexpr int Loop = 10; #endif constexpr int N = 5; auto x = NDArrayFactory::create('c', { bases[0], bases[1], bases[2], bases[3], bases[4] }); if (!random) { x.linspace(1); } else{ fill_random(x); } #define COMBINATIONS 1 #if COMBINATIONS //https://www.rosettacode.org/wiki/Combinations#C.2B.2B for (int k = N; k >= 1; k--) { std::string bitmask(k, 1); // K leading 1's bitmask.resize(N, 0); // N-K trailing 0's do { std::vector dimension; std::vector output_bases; for (int i = 0; i < N; ++i) // [0..N-1] integers { if (bitmask[i]) dimension.push_back(i); else { output_bases.push_back(bases[i]); } } #else std::vector dimension = { 0,1,2,3 }; int k = 4; #endif auto dim = NDArrayFactory::create(dimension); #if 1 nd4j_printf("C(N:%d K:%d) \n", N, k); dim.printIndexedBuffer("Dimension"); for (int xind : dimension) { nd4j_printf(" %d ,", bases[xind]); } nd4j_printf("%s", "\n"); #endif std::vector values; sd::ResultSet result; for (int e = 0; e < Loop; e++) { auto timeStart = std::chrono::system_clock::now(); NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create('c', output_bases) : NDArrayFactory::create(0); original_argmax(x, dimension, exp); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); #if COMBINATIONS } while (std::prev_permutation(bitmask.begin(), bitmask.end())); } #endif } #define DEBUG 1 void testNewReduction(bool random, bool checkCorrectness = false , char order ='c') { std::vector arr_dimensions; #if defined(DEBUG) int bases[] = { 3, 2, 3, 3, 5 ,4,7,4,7,7 }; constexpr int Loop = 1; constexpr int N = 10; #else int bases[] = { 8, 32, 64, 32, 64 }; constexpr int Loop = 10; constexpr int N = 5; #endif for (int i = 0; i < N; i++) { arr_dimensions.push_back(bases[i]); } auto x = NDArrayFactory::create(order,arr_dimensions); if (!random) { x.linspace(1); } else { fill_random(x); } #define COMBINATIONS 1 #if COMBINATIONS //https://www.rosettacode.org/wiki/Combinations#C.2B.2B for (int k = N; k >= 1; k--) { std::string bitmask(k, 1); // K leading 1's bitmask.resize(N, 0); // N-K trailing 0's do { std::vector dimension; std::vector output_bases; for (int i = 0; i < N; ++i) // [0..N-1] integers { if (bitmask[i]) dimension.push_back(i); else { output_bases.push_back(bases[i]); } } #else std::vector dimension = { 0,1,2,3 }; int k = 4; #endif auto dim = NDArrayFactory::create(dimension); #if 1 nd4j_printf("C(N:%d K:%d) \n", N, k); dim.printIndexedBuffer("Dimension"); for (int xind : dimension) { nd4j_printf(" %d ,", bases[xind]); } nd4j_printf("%s", "\n"); #endif sd::ops::argmax op; std::vector values; sd::ResultSet result; for (int e = 0; e < Loop; e++) { auto timeStart = std::chrono::system_clock::now(); result = op.evaluate({ &x, &dim }, {}, {}); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } auto z = result.at(0); if (checkCorrectness) { //check for the correctness NDArray exp = output_bases.size() > 0 ? NDArrayFactory::create('c', output_bases) : NDArrayFactory::create(0); original_argmax(x, dimension, exp); #if 0// defined(DEBUG) x.printIndexedBuffer("X"); exp.printIndexedBuffer("Expected"); z->printIndexedBuffer("Z"); #endif ASSERT_TRUE(exp.isSameShape(z)); ASSERT_TRUE(exp.equalsTo(z)); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); #if COMBINATIONS } while (std::prev_permutation(bitmask.begin(), bitmask.end())); } #endif } constexpr bool test_corr = true; #if !defined(DEBUG) TEST_F(PlaygroundTests, ArgMaxPerfLinspace) { testNewReduction(false, test_corr); } #endif TEST_F(PlaygroundTests, ArgMaxPerfRandom) { testNewReduction(true, test_corr); } TEST_F(PlaygroundTests, ArgMaxPerfRandomOrderF) { testNewReduction(true, test_corr, 'f'); } #if !defined(DEBUG) TEST_F(PlaygroundTests, ArgMaxPerfLegacyLinspace) { testLegacy(false); } TEST_F(PlaygroundTests, ArgMaxPerfLegacyRandom) { testLegacy(true); } #endif #endif /* TEST_F(PlaygroundTests, test_broadcast_1) { int pool = 1000; std::vector aX(pool); std::vector aY(pool); std::vector aZ(pool); for (int e = 0; e < pool; e++) { aX[e] = NDArrayFactory::create_('c', {512, 3072}); aY[e] = NDArrayFactory::create_('c', {3072}); aZ[e] = NDArrayFactory::create_('c', {512, 3072}); aX[e]->assign(119 * (e+1)); aY[e]->assign(119 * (e+3)); } std::vector values; Context ctx(1); sd::ops::biasadd op; for (int e = 0; e < 1000; e++) { auto x = aX[e < pool ? e : e % pool]; auto y = aY[e < pool ? e : e % pool]; auto z = aZ[e < pool ? e : e % pool]; auto timeStart = std::chrono::system_clock::now(); //op.execute({x, y}, {z}); sd::ops::helpers::addBias(ctx, *x, *y, *z, false); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); for (int e = 0; e < pool; e++) { delete aX[e]; delete aY[e]; delete aZ[e]; } } /* TEST_F(PlaygroundTests, test_broadcast_1) { int pool = 500; std::vector aX(pool); std::vector aY(pool); std::vector aZ(pool); for (int e = 0; e < pool; e++) { aX[e] = NDArrayFactory::create_('c', {512, 3072}); aY[e] = NDArrayFactory::create_('c', {768}); aZ[e] = NDArrayFactory::create_('c', {512, 3072}); aX[e]->assign( (e+1) / 119); aY[e]->assign( (e+3) / 119); } std::vector values; for (int e = 0; e < 1000; e++) { auto x = aX[e < pool ? e : e % pool]; auto y = aY[e < pool ? e : e % pool]; auto z = aZ[e < pool ? e : e % pool]; auto timeStart = std::chrono::system_clock::now(); //x->applyTrueBroadcast(BroadcastOpsTuple::Multiply(), *y, *z); x->applyTransform(transform::Tanh, *z, nullptr); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); for (int e = 0; e < pool; e++) { delete aX[e]; delete aY[e]; delete aZ[e]; } } */ /* TEST_F(PlaygroundTests, test_s_0) { std::vector> shapes = {{32, 224, 224, 3}, {32, 56, 56, 64}, {32, 7, 7, 512}}; std::vector threads = {1, 2, 4, 8, 16}; for (auto shape: shapes) { for (auto t: threads) { sd::Environment::getInstance().setMaxMasterThreads(t); auto x = NDArrayFactory::create('c', shape); auto y = NDArrayFactory::create('c', {shape[3]}); auto z = x.ulike(); std::vector values; Context ctx(1); ctx.setInputArray(0, &x); ctx.setInputArray(1, &y); ctx.setOutputArray(0, &z); sd::ops::biasadd op; for (int e = 0; e < 10000; e++) { auto timeStart = std::chrono::system_clock::now(); op.execute(&ctx); sd::ops::helpers::addBias(ctx, x, y, z, false); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Shape: [%lld, %lld, %lld, %lld]; Threads: [%i]; Time: %lld us;\n", shape[0], shape[1], shape[2], shape[3], t, values[values.size() / 2]); } } } TEST_F(PlaygroundTests, test_s_1) { std::vector> shapes = {{32, 3, 224, 224}, {32, 64, 56, 56}, {32, 512, 7, 7}}; std::vector threads = {1, 2, 4, 8, 16}; for (auto shape: shapes) { for (auto t: threads) { sd::Environment::getInstance().setMaxMasterThreads(t); auto x = NDArrayFactory::create('c', shape); auto y = NDArrayFactory::create('c', {shape[1]}); auto z = x.ulike(); std::vector values; Context ctx(1); ctx.setInputArray(0, &x); ctx.setInputArray(1, &y); ctx.setOutputArray(0, &z); sd::ops::biasadd op; for (int e = 0; e < 10000; e++) { auto timeStart = std::chrono::system_clock::now(); //op.execute({&x, &y}, {&z}, {true}); sd::ops::helpers::addBias(ctx, x, y, z, true); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Shape: [%lld, %lld, %lld, %lld]; Threads: [%i]; Time: %lld us;\n", shape[0], shape[1], shape[2], shape[3], t, values[values.size() / 2]); } } } */ /* TEST_F(PlaygroundTests, test_s_0) { auto x = NDArrayFactory::create('c', {32, 112, 112, 16}); auto y = NDArrayFactory::create('c', {16}); auto z = x.ulike(); std::vector values; Context ctx(1); ctx.setInputArray(0, &x); ctx.setInputArray(1, &y); ctx.setOutputArray(0, &z); sd::ops::biasadd op; for (int e = 0; e < 10000; e++) { auto timeStart = std::chrono::system_clock::now(); op.execute(&ctx); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast (timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); } */ /* TEST_F(PlaygroundTests, test_s_1) { auto x0 = NDArrayFactory::create('c', {32, 7, 7, 176}); auto x1 = x0.ulike(); auto x2 = x0.ulike(); auto x3 = x0.ulike(); auto x4 = x0.ulike(); auto x5 = x0.ulike(); auto y = NDArrayFactory::create(3); auto z = NDArrayFactory::create('c', {32, 7, 7, 1056}); Context ctx(1); ctx.setInputArray(0, &x0); ctx.setInputArray(1, &x1); ctx.setInputArray(2, &x2); ctx.setInputArray(3, &x3); ctx.setInputArray(4, &x4); ctx.setInputArray(5, &x5); ctx.setInputArray(6, &y); ctx.setOutputArray(0, &z); ctx.setBArguments({true}); std::vector values; sd::ops::concat op; op.execute(&ctx); for (int e = 0; e < 1000; e++) { auto timeStart = std::chrono::system_clock::now(); op.execute(&ctx); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast (timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld us;\n", values[values.size() / 2]); } */ /* TEST_F(PlaygroundTests, test_s_1) { auto t = ::runLightBenchmarkSuit(true); delete[] t; } TEST_F(PlaygroundTests, test_s_2) { std::atomic s; s = 0; auto func = PRAGMA_THREADS_FOR { s++; }; samediff::Threads::parallel_for(func, 0, 8192, 1, 4); std::vector values; for (int e = 0; e < 100000; e++) { s = 0; auto timeStart = std::chrono::system_clock::now(); //samediff::Threads::parallel_for(func, 0, 8192, 1, 4); PRAGMA_OMP_PARALLEL_THREADS(4) { s++; } auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast (timeEnd - timeStart).count(); values.emplace_back(outerTime); }; std::sort(values.begin(), values.end()); nd4j_printf("Time: %lld;\n", values[values.size() / 2]); } */ /* TEST_F(PlaygroundTests, test_s_4) { std::atomic f; std::atomic s; std::vector valuesX, valuesY; int iterations = 1000; s = 0; auto func = PRAGMA_THREADS_FOR { s++; }; samediff::Threads::parallel_for(func, 0, 8192, 1, 4); //////// auto x = NDArrayFactory::create('c', {32, 3, 256, 256}); auto z = NDArrayFactory::create('c', {32, 3, 256, 256}); x.linspace(1.0); auto xs0 = x.sizeAt(0); auto xs1 = x.sizeAt(1); auto xs2 = x.sizeAt(2); auto xs3 = x.sizeAt(3); auto buffer = x.bufferAsT(); auto zbuffer = z.bufferAsT(); for (int e = 0; e < iterations; e++) { auto timeStart = std::chrono::system_clock::now(); PRAGMA_OMP_PARALLEL_FOR_COLLAPSE(2) for (int i = 0; i < xs0; i++) { for (int j = 0; j < xs1; j++) { auto thread_id = omp_get_thread_num(); for (int k = 0; k < xs2; k++) { for (int l = 0; l < xs3; l++) { zbuffer[thread_id] += buffer[i * j + (k*l)] * 2.5f; } } } } auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); valuesX.emplace_back(outerTime); } for (int e = 0; e < iterations; e++) { auto timeStart = std::chrono::system_clock::now(); auto f2d = PRAGMA_THREADS_FOR_2D { for (auto i = start_x; i < stop_x; i++) { for (auto j = start_y; j < stop_y; j++) { for (auto k = 0; k < xs2; k++) { for (auto l = 0; l < xs3; l++) { zbuffer[thread_id] += buffer[i * j + (k * l)] * 2.5f; } } } } }; samediff::Threads::parallel_for(f2d, 0, xs0, 1, 0, xs1, 1); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); valuesY.emplace_back(outerTime); } if (valuesX.size() > 0) { std::sort(valuesX.begin(), valuesX.end()); nd4j_printf("OpenMP time: %lld; Min: %lld; Max: %lld;\n", valuesX[valuesX.size() / 2], valuesX[0], valuesX[valuesX.size() - 1]); } if (valuesY.size() > 0) { std::sort(valuesY.begin(), valuesY.end()); nd4j_printf("Threads time: %lld; Min: %lld; Max: %lld;\n", valuesY[valuesY.size() / 2], valuesY[0], valuesY[valuesY.size() - 1]); } nd4j_printf("Sum: %f\n", z.sumNumber().e(0)); } TEST_F(PlaygroundTests, test_s_5) { auto x = NDArrayFactory::create('c', {32, 1, 28, 28}); std::vector values; auto iterations = 100; auto startX = 0; auto stopX = x.sizeAt(0); auto incX = 1; auto startY = 0; auto stopY = x.sizeAt(1); auto incY = 1; auto numThreads = 4; // number of elements per loop auto delta_x = (stopX - startX); auto delta_y = (stopY - startY); // number of iterations per loop auto itersX = delta_x / incX; auto itersY = delta_y / incY; for (int e = 0; e < iterations; e++) { auto timeStart = std::chrono::system_clock::now(); // picking best fit here auto splitLoop = samediff::ThreadsHelper::pickLoop2d(numThreads, itersX, itersY); auto span = samediff::Span2::build(splitLoop, 0, numThreads, startX, stopX, incX, startY, stopY, incY); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Calculations time: [Median: %lld; Min: %lld; Max: %lld;]\n", values[values.size() / 2], values[0], values[values.size()-1]); } TEST_F(PlaygroundTests, test_s_6) { auto x = NDArrayFactory::create('c', {1024 * 1024 * 64}); auto buffer = x.bufferAsT(); auto len = x.lengthOf(); std::vector values; auto iterations = 1000; for (int i = 0; i < iterations; i++) { auto timeStart = std::chrono::system_clock::now(); // picking best fit here for (int e = 0; e < len; e++) { buffer[e] = (buffer[e] + 1.72f) * 3.17f - 0.0012f; } auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast(timeEnd - timeStart).count(); values.emplace_back(outerTime); } std::sort(values.begin(), values.end()); nd4j_printf("Calculations time: [Median: %lld; Min: %lld; Max: %lld;]\n", values[values.size() / 2], values[0], values[values.size()-1]); } TEST_F(PlaygroundTests, test_s_3) { std::atomic s; s = 0; auto func = PRAGMA_THREADS_FOR { s++; }; for (int e = 0; e < 10000; e++) { samediff::Threads::parallel_for(func, 0, 8192, 1, 4); } } */ /* TEST_F(PlaygroundTests, test_relubp_1) { auto x = NDArrayFactory::create('c', {128, 64, 224, 224}); auto y = x.ulike(); auto z = x.ulike(); RandomGenerator rng(119, 120); RandomLauncher::fillUniform(LaunchContext::defaultContext(), rng, &x, -1.0, 1.0); RandomLauncher::fillUniform(LaunchContext::defaultContext(), rng, &y, -1.0, 1.0); int iterations = 10; auto timeStart = std::chrono::system_clock::now(); for (int e = 0; e < iterations; e++) ops::helpers::reluDerivative(LaunchContext::defaultContext(), &x, &y, &z); auto timeEnd = std::chrono::system_clock::now(); auto outerTime = std::chrono::duration_cast (timeEnd - timeStart).count(); auto time = (Nd4jLong) outerTime / iterations; auto bw = (1000000L * (float) (x.lengthOf() * x.sizeOfT()) / time) / 1024 / 1024 / 1024; nd4j_printf("Time: %lld; BW: %f GB/s\n", time, bw); } ////////////////////////////////////////////////////////////////////// TEST_F(PlaygroundTests, my) { int bS=8, iD=32,iH=32,iW=32, iC=128, kD=2,kH=2,kW=2, sD=1,sH=1,sW=1, pD=0,pH=0,pW=0, dD=2,dH=2,dW=2; int oD,oH,oW; sd::ops::ConvolutionUtils::calcOutSizeDeconv3D(oD, oH, oW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, 0); printf("!!%i, %i, %i\n", oD,oH,oW); NDArray col('c', {bS, iC, kD, kH, kW, iD, iH, iW}, sd::DataType::DOUBLE); NDArray vol('c', {bS, iC, oD, oH, oW}, sd::DataType::DOUBLE); col = 3.77; vol = -10.33; auto variableSpace = new VariableSpace(); auto block = new Context(1, variableSpace, false); // not-in-place auto timeStart = std::chrono::system_clock::now(); sd::ops::ConvolutionUtils::col2vol(*block, col, vol, sD, sH, sW, pD, pH, pW, dD, dH, dW); auto timeEnd = std::chrono::system_clock::now(); auto time = std::chrono::duration_cast (timeEnd - timeStart).count(); printf("time: %i \n", time); delete block; delete variableSpace; } TEST_F(PlaygroundTests, my) { int bS=32, iD=32,iH=64,iW=64, iC=128, kD=2,kH=2,kW=2, sD=1,sH=1,sW=1, pD=0,pH=0,pW=0, dD=2,dH=2,dW=2; int oD,oH,oW; // sd::ops::ConvolutionUtils::calcOutSizeDeconv3D(oD, oH, oW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, 0); sd::ops::ConvolutionUtils::calcOutSizeDeconv2D(oH, oW, kH, kW, sH, sW, pH, pW,dH, dW, iH, iW, 0); printf("!!%i, %i, %i\n", oD,oH,oW); // NDArray col('c', {bS, iC, kD, kH, kW, iD, iH, iW}, sd::DataType::DOUBLE); // NDArray vol('c', {bS, iC, oD, oH, oW}, sd::DataType::DOUBLE); NDArray col('c', {bS, iC, kH, kW, iH, iW}, sd::DataType::DOUBLE); NDArray im('c', {bS, iC, oH, oW}, sd::DataType::DOUBLE); col = 3.77; // vol = -10.33; im = -10.33; auto variableSpace = new VariableSpace(); auto block = new Context(1, variableSpace, false); // not-in-place auto timeStart = std::chrono::system_clock::now(); // sd::ops::ConvolutionUtils::col2vol(*block, col, vol, sD, sH, sW, pD, pH, pW, dD, dH, dW); sd::ops::helpers::col2im(*col.getContext(), col, im, sH, sW, pH, pW, iH, iW, dH, dW); auto timeEnd = std::chrono::system_clock::now(); auto time = std::chrono::duration_cast (timeEnd - timeStart).count(); printf("time: %i \n", time); delete block; delete variableSpace; } TEST_F(PlaygroundTests, my) { int N = 100; int bS=16, iH=128,iW=128, iC=32,oC=64, kH=4,kW=4, sH=1,sW=1, pH=0,pW=0, dH=1,dW=1; int oH=128,oW=128; int paddingMode = 1; // 1-SAME, 0-VALID; int dataFormat = 1; // 1-NHWC, 0-NCHW // NDArray input('c', {bS, iC, iH, iW}, sd::DataType::FLOAT32); // NDArray output('c', {bS, oC, oH, oW}, sd::DataType::FLOAT32); NDArray input('c', {bS, iH, iW, iC}, sd::DataType::FLOAT32); NDArray output('c', {bS, oH, oW, oC}, sd::DataType::FLOAT32); // NDArray weights('c', {kH, kW, iC, oC}, sd::DataType::FLOAT32); // permute [kH, kW, iC, oC] -> [oC, iC, kH, kW] NDArray weights('c', {oC, iC, kH, kW}, sd::DataType::FLOAT32); NDArray bias('c', {oC}, sd::DataType::FLOAT32); input = 5.; weights = 3.; bias = 1.; sd::ops::conv2d op; auto err = op.execute({&input, &weights, &bias}, {&output}, {kH,kW, sH,sW, pH,pW, dH,dW, paddingMode, dataFormat}); auto timeStart = std::chrono::system_clock::now(); for (int i = 0; i < N; ++i) err = op.execute({&input, &weights, &bias}, {&output}, {kH,kW, sH,sW, pH,pW, dH,dW, paddingMode, dataFormat}); auto timeEnd = std::chrono::system_clock::now(); auto time = std::chrono::duration_cast ((timeEnd - timeStart) / N).count(); printf("time: %i \n", time); } /////////////////////////////////////////////////////////////////// TEST_F(PlaygroundTests, lstmLayerCellBp_1) { const int bS = 2; const int nIn = 4; const int nOut = 3; // const int nIn = 8; // const int nOut = 6; const float cellClip = 1.1; // clipping value const Nd4jLong gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const float gateAlpha = 0; // alpha value for activation for gates, not required for sigmoid const float gateBeta = 0; // beta value for activation for gates, not required for sigmoid const Nd4jLong cellAct = 0; // tanh activation for cell state const float cellAlpha = 0; // alpha value for cell state activation, not required for tanh const float cellBeta = 0; // beta value for cell state activation, not required for tanh const Nd4jLong outAct = 0; // tanh activation for output const float outAlpha = 0; // alpha value for output activation, not required for tanh const float outBeta = 0; // beta value for output activation, not required for tanh NDArray x ('c', {bS, nIn}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdc('c', {bS, nOut}, sd::DataType::DOUBLE); // NDArray x ('c', {nIn}, sd::DataType::DOUBLE); // NDArray hI('c', {nOut}, sd::DataType::DOUBLE); // NDArray cI('c', {nOut}, sd::DataType::DOUBLE); // NDArray dLdh('c', {nOut}, sd::DataType::DOUBLE); // NDArray dLdc('c', {nOut}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b ('c', {4*nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); x.linspace(-4,1); hI.linspace(-2.5,0.5); cI.linspace(-3,0.5); Wx.linspace(0,0.1); Wr.linspace(3,-0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); // x.assign(1.); // hI.assign(2.); // cI.assign(3.); // Wx.assign(0.5); // Wr.assign(0.5); // Wp.assign(0.75); // b.assign(0.7); std::vector tArgs = {cellClip}; std::vector iArgs = {gateAct, cellAct, outAct}; // std::vector bArgs = {false, false}; // const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &hI, &cI}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &hI, &cI, &dLdh}, tArgs, iArgs, bArgs); std::vector bArgs = {true, true}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); sd::ops::lstmLayerCell opFF; sd::ops::lstmLayerCellBp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, true, true, true}); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 3; const int dataFormat = 0; // [sL,bS,nIn] const int directionMode = 0; // forward const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = false; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 3; const int dataFormat = 1; // [bS,sL,nIn] const int directionMode = 0; // forward const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = false; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut] const auto retLastH = false; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector(), {0., 1.}, GradCheck::LossFunc::MEAN); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) { const int sL = 4; const int bS = 3; const int nIn = 3; const int nOut = 2; const int dataFormat = 2; // [bS, nIn, sL] const int directionMode = 0; // forward const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = true; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 3; const int dataFormat = 1; // [bS,sL,nIn] const int directionMode = 1; // backward const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = false; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 2; const int dataFormat = 2; // [bS, nIn, sL] const int directionMode = 1; // backward const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = true; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 2; const int dataFormat = 2; // [bS, nIn, sL] const int directionMode = 2; // bidirectional sum const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = true; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 2; const int dataFormat = 1; // [bS,sL,nIn] const int directionMode = 3; // bidirectional concat const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = true; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE); NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE); NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); ASSERT_TRUE(isGradCorrect); } /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) { const int sL = 3; const int bS = 2; const int nIn = 2; const int nOut = 2; const int dataFormat = 3; // [sL, bS, nIn] const int directionMode = 4; // bidirectional extra output dim const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates const int cellAct = 0; // tanh activation for cell state const int outAct = 0; // tanh activation for output const bool hasBiases = true; // biases array is provided const bool hasSeqLen = true; // seqLen array is not provided const auto hasInitH = true; // initial output is provided const auto hasInitC = true; // initial cell state is provided const auto hasPH = true; // peephole connections are absent const auto retFullSeq = true; // dLdh per each time step const auto retLastH = true; // output at last time step const auto retLastC = true; // cells state at last time step const double cellClip = 0.5; // clipping NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE); NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); x.linspace(-2,0.1); hI.linspace(-1.5,0.1); cI.linspace(0.7,-0.1); Wx.linspace(1,-0.1); Wr.linspace(-1,0.1); Wp.linspace(0.2,0.2); b.linspace(1,-0.15); std::vector tArgs = {cellClip}; std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); sd::ops::lstmLayer opFF; sd::ops::lstmLayer_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); ASSERT_TRUE(isGradCorrect); } ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests15, gru_bp_1) { const int sL = 3; const int bS = 2; const int nIn = 5; const int nOut = 4; NDArray x('c', {sL, bS, nIn}, {0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE); NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE); NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE); NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE); NDArray b('c', {3*nOut}, sd::DataType::DOUBLE); NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); Wx.linspace(1,-0.1); Wh.linspace(0.2,0.2); b.linspace(1,-0.15); const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {}); const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {}); sd::ops::gru opFF; sd::ops::gru_bp opBP; const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); } */