cavis/libnd4j/tests_cpu/layers_tests/OmpLaunchHelperTests.cpp

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
// Created by raver119 on 30.06.18.
//

#include "testlayers.h"
#include <NDArray.h>
#include <OmpLaunchHelper.h>


using namespace nd4j;
using namespace nd4j::graph;

class OmpLaunchHelperTests : public testing::Test {
private:
    int ewt = 0;
public:
    OmpLaunchHelperTests() {
        this->ewt = Environment::getInstance()->elementwiseThreshold();
        Environment::getInstance()->setElementwiseThreshold(1000);
    };

    ~OmpLaunchHelperTests() {
        Environment::getInstance()->setElementwiseThreshold(this->ewt);
    }
};

TEST_F(OmpLaunchHelperTests, Test_BetterSpan_1) {
    auto span = OmpLaunchHelper::betterSpan(1000, 4);
    ASSERT_EQ(250, span);
}

TEST_F(OmpLaunchHelperTests, Test_BetterSpan_2) {
    auto span = OmpLaunchHelper::betterSpan(1001, 4);
    ASSERT_EQ(251, span);
}

TEST_F(OmpLaunchHelperTests, Test_BetterSpan_3) {
    auto span = OmpLaunchHelper::betterSpan(1002, 4);
    ASSERT_EQ(251, span);
}

TEST_F(OmpLaunchHelperTests, Test_BetterSpan_5) {
    auto span = OmpLaunchHelper::betterSpan(1003, 4);
    ASSERT_EQ(251, span);
}

TEST_F(OmpLaunchHelperTests, Test_BetterSpan_6) {
    auto span = OmpLaunchHelper::betterSpan(1004, 4);
    ASSERT_EQ(251, span);
}


TEST_F(OmpLaunchHelperTests, Test_BetterThreads_1) {
    auto n = OmpLaunchHelper::betterThreads(4000, 6);
    ASSERT_EQ(4, n);
}

TEST_F(OmpLaunchHelperTests, Test_BetterThreads_2) {
    auto n = OmpLaunchHelper::betterThreads(12000, 6);
    ASSERT_EQ(6, n);
}

TEST_F(OmpLaunchHelperTests, Test_BetterThreads_3) {
    auto n = OmpLaunchHelper::betterThreads(899, 6);
    ASSERT_EQ(1, n);
}

//////////////////////////////////////////////////////////////////////
TEST_F(OmpLaunchHelperTests, loop_test1) {
    
    const Nd4jLong N = 20010;
    Nd4jLong desiredNumThreads = 2;
    int x[N] = {0};

    OmpLaunchHelper info(N, desiredNumThreads);
    PRAGMA_OMP_PARALLEL_THREADS(info._numThreads)
    {                        
        auto threadNum = omp_get_thread_num();
        auto xi = x + info.getThreadOffset(threadNum);

        auto ulen = static_cast<unsigned int>(info.getItersPerThread(threadNum));

        PRAGMA_OMP_SIMD
        for (Nd4jLong i = 0; i < ulen; i++)
            xi[i] = xi[i] + 1;
    }
    
    #ifdef _OPENMP
        ASSERT_EQ(desiredNumThreads, info._numThreads);
    #else
        ASSERT_EQ(1, info._numThreads);
    #endif
    
}

TEST_F(OmpLaunchHelperTests, test_tad_threads_1) {
    Nd4jLong numTads = 16;
    Nd4jLong tadLength = 16;

//    nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance()->tadThreshold(), Environment::getInstance()->elementwiseThreshold());
    ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));
}

TEST_F(OmpLaunchHelperTests, test_tad_threads_2) {
    if (omp_get_max_threads() <= 1)
        return;

    Nd4jLong numTads = 2;
    Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();

    ASSERT_EQ(2, OmpLaunchHelper::tadThreads(tadLength, numTads));
}

TEST_F(OmpLaunchHelperTests, test_tad_threads_3) {
    Nd4jLong numTads = 2;
    Nd4jLong tadLength = 128;

    ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));
}

TEST_F(OmpLaunchHelperTests, test_tad_threads_4) {
    Nd4jLong numTads = 4;
    Nd4jLong tadLength = 64;

    ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));
}

TEST_F(OmpLaunchHelperTests, test_tad_threads_5) {
    auto exp = omp_get_max_threads();

    Nd4jLong numTads = exp;
    Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();

    ASSERT_EQ(exp, OmpLaunchHelper::tadThreads(tadLength, numTads));
}
Eclipse Migration Initial Commit 2019-06-06 14:21:15 +02:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`//`
			`// Created by raver119 on 30.06.18.`
			`//`

			`#include "testlayers.h"`
			`#include <NDArray.h>`
			`#include <OmpLaunchHelper.h>`


			`using namespace nd4j;`
			`using namespace nd4j::graph;`

			`class OmpLaunchHelperTests : public testing::Test {`
			`private:`
			`int ewt = 0;`
			`public:`
			`OmpLaunchHelperTests() {`
			`this->ewt = Environment::getInstance()->elementwiseThreshold();`
			`Environment::getInstance()->setElementwiseThreshold(1000);`
			`};`

			`~OmpLaunchHelperTests() {`
			`Environment::getInstance()->setElementwiseThreshold(this->ewt);`
			`}`
			`};`

			`TEST_F(OmpLaunchHelperTests, Test_BetterSpan_1) {`
			`auto span = OmpLaunchHelper::betterSpan(1000, 4);`
			`ASSERT_EQ(250, span);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterSpan_2) {`
			`auto span = OmpLaunchHelper::betterSpan(1001, 4);`
			`ASSERT_EQ(251, span);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterSpan_3) {`
			`auto span = OmpLaunchHelper::betterSpan(1002, 4);`
			`ASSERT_EQ(251, span);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterSpan_5) {`
			`auto span = OmpLaunchHelper::betterSpan(1003, 4);`
			`ASSERT_EQ(251, span);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterSpan_6) {`
			`auto span = OmpLaunchHelper::betterSpan(1004, 4);`
			`ASSERT_EQ(251, span);`
			`}`


			`TEST_F(OmpLaunchHelperTests, Test_BetterThreads_1) {`
			`auto n = OmpLaunchHelper::betterThreads(4000, 6);`
			`ASSERT_EQ(4, n);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterThreads_2) {`
			`auto n = OmpLaunchHelper::betterThreads(12000, 6);`
			`ASSERT_EQ(6, n);`
			`}`

			`TEST_F(OmpLaunchHelperTests, Test_BetterThreads_3) {`
			`auto n = OmpLaunchHelper::betterThreads(899, 6);`
			`ASSERT_EQ(1, n);`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`TEST_F(OmpLaunchHelperTests, loop_test1) {`

			`const Nd4jLong N = 20010;`
			`Nd4jLong desiredNumThreads = 2;`
			`int x[N] = {0};`

			`OmpLaunchHelper info(N, desiredNumThreads);`
			`PRAGMA_OMP_PARALLEL_THREADS(info._numThreads)`
			`{`
			`auto threadNum = omp_get_thread_num();`
			`auto xi = x + info.getThreadOffset(threadNum);`

			`auto ulen = static_cast<unsigned int>(info.getItersPerThread(threadNum));`

			`PRAGMA_OMP_SIMD`
			`for (Nd4jLong i = 0; i < ulen; i++)`
			`xi[i] = xi[i] + 1;`
			`}`

			`#ifdef _OPENMP`
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io> 2019-08-02 19:01:03 +02:00			`ASSERT_EQ(desiredNumThreads, info._numThreads);`
Eclipse Migration Initial Commit 2019-06-06 14:21:15 +02:00			`#else`
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io> 2019-08-02 19:01:03 +02:00			`ASSERT_EQ(1, info._numThreads);`
Eclipse Migration Initial Commit 2019-06-06 14:21:15 +02:00			`#endif`

			`}`

			`TEST_F(OmpLaunchHelperTests, test_tad_threads_1) {`
			`Nd4jLong numTads = 16;`
			`Nd4jLong tadLength = 16;`

			`// nd4j_printf("TT: [%i]; ET: [%i];\n", Environment::getInstance()->tadThreshold(), Environment::getInstance()->elementwiseThreshold());`
			`ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));`
			`}`

			`TEST_F(OmpLaunchHelperTests, test_tad_threads_2) {`
			`if (omp_get_max_threads() <= 1)`
			`return;`

			`Nd4jLong numTads = 2;`
			`Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();`

			`ASSERT_EQ(2, OmpLaunchHelper::tadThreads(tadLength, numTads));`
			`}`

			`TEST_F(OmpLaunchHelperTests, test_tad_threads_3) {`
			`Nd4jLong numTads = 2;`
			`Nd4jLong tadLength = 128;`

			`ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));`
			`}`

			`TEST_F(OmpLaunchHelperTests, test_tad_threads_4) {`
			`Nd4jLong numTads = 4;`
			`Nd4jLong tadLength = 64;`

			`ASSERT_EQ(1, OmpLaunchHelper::tadThreads(tadLength, numTads));`
			`}`

			`TEST_F(OmpLaunchHelperTests, test_tad_threads_5) {`
			`auto exp = omp_get_max_threads();`

			`Nd4jLong numTads = exp;`
			`Nd4jLong tadLength = Environment::getInstance()->elementwiseThreshold();`

			`ASSERT_EQ(exp, OmpLaunchHelper::tadThreads(tadLength, numTads));`
			`}`