1621 lines
61 KiB
C++
1621 lines
61 KiB
C++
|
/*******************************************************************************
|
||
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
||
|
*
|
||
|
* This program and the accompanying materials are made available under the
|
||
|
* terms of the Apache License, Version 2.0 which is available at
|
||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||
|
* License for the specific language governing permissions and limitations
|
||
|
* under the License.
|
||
|
*
|
||
|
* SPDX-License-Identifier: Apache-2.0
|
||
|
******************************************************************************/
|
||
|
|
||
|
//
|
||
|
// Created by GS <sgazeos@gmail.com> on 22.07.2019.
|
||
|
//
|
||
|
|
||
|
#include "testlayers.h"
|
||
|
#include <NDArray.h>
|
||
|
#include <ShapeUtils.h>
|
||
|
#include <reduce3.h>
|
||
|
#include <ops/declarable/LegacyTransformOp.h>
|
||
|
#include <ops/declarable/LegacyPairwiseTransformOp.h>
|
||
|
#include <ops/declarable/LegacyScalarOp.h>
|
||
|
#include <ops/declarable/LegacyReduceSameOp.h>
|
||
|
#include <ops/declarable/LegacyReduceFloatOp.h>
|
||
|
#include <ops/declarable/LegacyIndexReduceOp.h>
|
||
|
#include <ops/declarable/LegacyBroadcastOp.h>
|
||
|
#include <helpers/TAD.h>
|
||
|
#include <helpers/ConstantTadHelper.h>
|
||
|
#include <type_conversions.h>
|
||
|
#include <ops/declarable/CustomOperations.h>
|
||
|
using namespace nd4j;
|
||
|
using namespace nd4j::ops;
|
||
|
|
||
|
class NativeOpsTests : public testing::Test {
|
||
|
public:
|
||
|
|
||
|
};
|
||
|
|
||
|
|
||
|
TEST_F(NativeOpsTests, CreateContextTests_1) {
|
||
|
// auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
// x.assign(1.0);
|
||
|
// auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto context = ::createContext();
|
||
|
ASSERT_TRUE(context == nullptr);
|
||
|
//delete context;
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, CreateContextTests_2) {
|
||
|
// auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
// x.assign(1.0);
|
||
|
// auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto context1 = ::createContext();
|
||
|
auto context2 = ::createContext();
|
||
|
ASSERT_TRUE(context1 == context2);
|
||
|
//delete context1;
|
||
|
//delete context2;
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, PointerTests_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
||
|
// x.linspace(1.0);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::tryPointer(nullptr, x.getBuffer(), 4);
|
||
|
#endif
|
||
|
|
||
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
// exp.assign(-1.0);
|
||
|
//
|
||
|
// nd4j::ops::LegacyTransformSameOp op(transform::Neg); // Neg
|
||
|
// auto result = op.execute({&x}, {}, {});
|
||
|
//
|
||
|
// ASSERT_EQ(1, result->size());
|
||
|
//
|
||
|
// auto z = result->at(0);
|
||
|
//
|
||
|
// ASSERT_TRUE(exp.equalsTo(z));
|
||
|
//
|
||
|
// delete result;
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ThresholdTests_1) {
|
||
|
// auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
||
|
// x.linspace(1.0);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::setElementThreshold(4);
|
||
|
ASSERT_TRUE(4 == nd4j::Environment::getInstance()->elementwiseThreshold());
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ThresholdTests_2) {
|
||
|
// auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
||
|
// x.linspace(1.0);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::setTADThreshold(4);
|
||
|
ASSERT_TRUE(4 == nd4j::Environment::getInstance()->tadThreshold());
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecIndexReduce_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
||
|
auto exp = NDArrayFactory::create<Nd4jLong>(120);
|
||
|
x.linspace(1.0);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::execIndexReduceScalar(nullptr,
|
||
|
indexreduce::IndexMax,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
|
||
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 4LL);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecIndexReduce_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<Nd4jLong>(120);
|
||
|
x.linspace(1.0);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
NDArray dimension = NDArrayFactory::create<int>({});
|
||
|
::execIndexReduce(nullptr,
|
||
|
indexreduce::IndexMax,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
|
||
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 24LL);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecBroadcast_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 1});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
x.linspace(1.0);
|
||
|
y.linspace(2,2);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
||
|
|
||
|
::execBroadcast(nullptr,
|
||
|
broadcast::Add,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
|
||
|
ASSERT_TRUE(exp.e<float>(0) == 3.);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecBroadcast_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 1});
|
||
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
x.linspace(1.0);
|
||
|
y.linspace(2,2);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {(int)0});
|
||
|
|
||
|
::execBroadcastBool(nullptr,
|
||
|
broadcast::EqualTo,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
ASSERT_TRUE(exp.e<bool>(1) && !exp.e<bool>(0));
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecPairwise_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
x.linspace(1.0);
|
||
|
y.assign(2.);
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::execPairwiseTransform(nullptr,
|
||
|
pairwise::Add,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr);
|
||
|
ASSERT_TRUE(exp.e<float>(5) == 8.);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ExecPairwise_2) {
|
||
|
auto x = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
x.assign(true);
|
||
|
y.assign(false);
|
||
|
y.t<bool>(5) = true;
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
::execPairwiseTransformBool(nullptr,
|
||
|
pairwise::And,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr);
|
||
|
ASSERT_TRUE(exp.e<bool>(5) && !exp.e<bool>(4));
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
||
|
|
||
|
::execReduceFloat(nullptr,
|
||
|
reduce::Mean,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce Mean");
|
||
|
ASSERT_TRUE(exp.e<float>(0) == 13.);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
|
||
|
::execReduceSame(nullptr,
|
||
|
reduce::Sum,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce Sum");
|
||
|
ASSERT_TRUE(exp.e<float>(0) == 325.);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_3) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<bool>(false);
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
|
||
|
::execReduceBool(nullptr,
|
||
|
reduce::All,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.e<bool>(0) == true);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_4) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<Nd4jLong>(120LL);
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
|
||
|
::execReduceLong(nullptr,
|
||
|
reduce::CountNonZero,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
||
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 25LL);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_5) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<Nd4jLong>(120LL);
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
||
|
::execReduceLong2(nullptr,
|
||
|
reduce::CountNonZero,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
||
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 25LL);
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_6) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<Nd4jLong>({5, 4, 3, 2, 1});
|
||
|
auto exp = NDArrayFactory::create<Nd4jLong>({1,2,3,4,6});
|
||
|
x.linspace(1.0);
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
printf("Unsupported for cuda now.\n");
|
||
|
#else
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
||
|
x.p(5, 0);
|
||
|
x.p(10, 0); x.p(11, 0);
|
||
|
x.p(15, 0); x.p(16, 0); x.p(17, 0);
|
||
|
x.p(20, 0); x.p(21, 0); x.p(22, 0); x.p(23, 0);
|
||
|
::execReduceLong2(nullptr,
|
||
|
reduce::CountNonZero,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
nullptr, nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_7) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
auto z = NDArrayFactory::create<float>(13.);
|
||
|
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
x.syncToHost();
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
x.syncToDevice();
|
||
|
dimension.syncToHost();
|
||
|
::execReduceFloat2(extra,
|
||
|
reduce::Mean,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce Mean");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_8) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<float>(120.);
|
||
|
auto exp = NDArrayFactory::create<float>(325.);
|
||
|
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
x.syncToDevice();
|
||
|
|
||
|
dimension.syncToHost();
|
||
|
|
||
|
::execReduceSame2(extra,
|
||
|
reduce::Sum,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
z.buffer(), z.shapeInfo(),
|
||
|
z.specialBuffer(), z.specialShapeInfo(),
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce Sum");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ReduceTest_9) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<bool>(false);
|
||
|
auto z = NDArrayFactory::create<bool>(true);
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
x.syncToDevice();
|
||
|
|
||
|
dimension.syncToHost();
|
||
|
::execReduceBool2(extra,
|
||
|
reduce::All,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, Reduce3Test_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
auto z = NDArrayFactory::create<float>(650.);
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
|
||
|
::execReduce3(extra,
|
||
|
reduce3::Dot,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce3 Dot");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, Reduce3Test_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
auto z = NDArrayFactory::create<float>(650.);
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
|
||
|
::execReduce3Scalar(extra,
|
||
|
reduce3::Dot,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce3 Dot");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, Reduce3Test_3) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
auto z = NDArrayFactory::create<float>(650.);
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
dimension.syncToHost();
|
||
|
|
||
|
::execReduce3Tad(extra,
|
||
|
reduce3::Dot,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
||
|
nullptr, nullptr, nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, Reduce3Test_4) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>(120.);
|
||
|
auto z = NDArrayFactory::create<float>(650.);
|
||
|
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
dimension.syncToHost();
|
||
|
int* dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
|
||
|
auto hTADShapeInfoX = tadPackX.primaryShapeInfo();
|
||
|
auto hTADOffsetsX = tadPackX.primaryOffsets();
|
||
|
auto hTADShapeInfoY = tadPackY.primaryShapeInfo();
|
||
|
auto hTADOffsetsY = tadPackY.primaryOffsets();
|
||
|
|
||
|
::execReduce3All(extra,
|
||
|
reduce3::Dot,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
||
|
hTADShapeInfoX, hTADOffsetsX, hTADShapeInfoY, hTADOffsetsY);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ScalarTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>(10.);
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
z.linspace(10., 10.);
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
::execScalar(extra,
|
||
|
scalar::Multiply,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(), nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ScalarTest_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>(10.);
|
||
|
auto exp = NDArrayFactory::create<bool>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<bool>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
z.assign(false);
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
::execScalarBool(extra,
|
||
|
scalar::GreaterThan,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(), nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.e<bool>(5) == z.e<bool>(5) && exp.e<bool>(15) != z.e<bool>(15));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
||
|
auto exp = NDArrayFactory::create<float>(0.9);
|
||
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
::execSummaryStatsScalar(extra,
|
||
|
variance::SummaryStatsVariance,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(), false);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Standard Variance");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
||
|
auto exp = NDArrayFactory::create<float>(0.9);
|
||
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
::execSummaryStats(extra,
|
||
|
variance::SummaryStatsVariance,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(), false);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Standard Variance");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_3) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
||
|
auto exp = NDArrayFactory::create<float>(0.9);
|
||
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
auto dimensions = NDArrayFactory::create<int>({0, 1});
|
||
|
::execSummaryStatsTad(extra,
|
||
|
variance::SummaryStatsVariance,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
dimensions.buffer(), dimensions.shapeInfo(),
|
||
|
dimensions.specialBuffer(), dimensions.specialShapeInfo(),
|
||
|
false,
|
||
|
nullptr, nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Standard Variance");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, TransformTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
z.linspace(1.);
|
||
|
::execTransformFloat(extra,
|
||
|
transform::Sqrt,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Sqrt is");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, TransformTest_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
z.linspace(1.);
|
||
|
::execTransformSame(extra,
|
||
|
transform::Square,
|
||
|
z.buffer(), z.shapeInfo(),
|
||
|
z.specialBuffer(), z.specialShapeInfo(),
|
||
|
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Square is");
|
||
|
ASSERT_TRUE(exp.equalsTo(x));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, TransformTest_3) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<bool>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.);
|
||
|
z.assign(true);
|
||
|
x.p(24, -25);
|
||
|
z.p(24, false);
|
||
|
::execTransformBool(extra,
|
||
|
transform::IsPositive,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("IsPositive");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, TransformTest_4) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0, 1, 2, 3, 2, 1, 0, 1.57, 1.57, 1.57, 3.141592, 3.141592,
|
||
|
3.141592, 0, 0, 0, 0, 1, 1, 2, 2, 2, 1, 0, 0});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5}, {1., 0.540302, -0.416147, -0.989992, -0.416147, 0.540302, 1.0,
|
||
|
0.000796, 0.000796, 0.000796, -1, -1, -1, 1., 1., 1.0, 1.0,
|
||
|
0.540302, 0.540302, -0.416147, -0.416147, -0.416147, 0.540302, 1., 1.});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
//z.linspace(1.);
|
||
|
::execTransformStrict(extra,
|
||
|
transform::Cosine,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
nullptr);
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Cosine");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ScalarTadTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>(10.);
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
z.linspace(10., 10.);
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
||
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
|
||
|
::execScalarTad(extra,
|
||
|
scalar::Multiply,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
||
|
tadPackX.primaryShapeInfo(), tadPackX.primaryOffsets(), tadPackZ.primaryShapeInfo(), tadPackZ.primaryOffsets());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("Reduce All");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ScalarTadTest_2) {
|
||
|
auto x = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<bool>(true);
|
||
|
auto exp = NDArrayFactory::create<bool>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<bool>('c', {5, 5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.assign(false);
|
||
|
x.p(5, true);
|
||
|
x.p(15, true);
|
||
|
//z.linspace(10., 10.);
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
||
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
z.assign(true);
|
||
|
|
||
|
::execScalarBoolTad(extra,
|
||
|
scalar::And,
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo(),
|
||
|
exp.buffer(), exp.shapeInfo(),
|
||
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
||
|
y.buffer(), y.shapeInfo(),
|
||
|
y.specialBuffer(), y.specialShapeInfo(),
|
||
|
nullptr,
|
||
|
dimension.buffer(), dimension.shapeInfo(),
|
||
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
||
|
tadPackX.primaryShapeInfo(), tadPackX.primaryOffsets(), tadPackZ.primaryShapeInfo(), tadPackZ.primaryOffsets());
|
||
|
// x.printIndexedBuffer("Input");
|
||
|
// exp.printIndexedBuffer("And");
|
||
|
ASSERT_TRUE(exp.e<bool>(5) == z.e<bool>(5) && exp.e<bool>(15));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, FlattenTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {2, 5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {2, 5,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0,2);
|
||
|
y.linspace(2,2);
|
||
|
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
||
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
exp(1, {0}).linspace(1,2);
|
||
|
::flatten(extra,
|
||
|
25, 'c', z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
||
|
x.buffer(), x.shapeInfo(),
|
||
|
x.specialBuffer(), x.specialShapeInfo());
|
||
|
|
||
|
// exp.printIndexedBuffer("Exp");
|
||
|
// z.printIndexedBuffer("Flatten");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ConcatTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {10,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {10,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.linspace(26);
|
||
|
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {(int)0});
|
||
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
//auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
exp.linspace(1);
|
||
|
Nd4jPointer datas[] = {x.buffer(), y.buffer()};
|
||
|
Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()};
|
||
|
|
||
|
::concat(extra,
|
||
|
0, 2, datas, shapes, nullptr, nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
||
|
nullptr, nullptr);
|
||
|
|
||
|
// exp.printIndexedBuffer("Exp");
|
||
|
// z.printIndexedBuffer("Concat");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ConcatTest_2) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {10,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {10,5});
|
||
|
|
||
|
Nd4jPointer extra[6];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extra[1] = x.getContext()->getCudaStream();
|
||
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
||
|
x.syncToHost();
|
||
|
y.syncToHost();
|
||
|
printf("Unsupported for CUDA platform yet.\n");
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1.0);
|
||
|
y.linspace(26);
|
||
|
|
||
|
//y.assign(2.);
|
||
|
x.syncToDevice();
|
||
|
z.syncToDevice();
|
||
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {(int)0});
|
||
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
||
|
//auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
||
|
exp.linspace(1);
|
||
|
Nd4jPointer datas[] = {x.buffer(), y.buffer()};
|
||
|
Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()};
|
||
|
|
||
|
::specialConcat(extra,
|
||
|
0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr);
|
||
|
|
||
|
// exp.printIndexedBuffer("Exp");
|
||
|
// z.printIndexedBuffer("Concat");
|
||
|
ASSERT_TRUE(exp.equalsTo(z));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, InitializeTest_1) {
|
||
|
// ::initializeDevicesAndFunctions();
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, MallocTest_1) {
|
||
|
auto a = ::mallocHost(16, 0);
|
||
|
::freeHost(a);
|
||
|
auto dA = ::mallocDevice(16, 0, 0);
|
||
|
::freeDevice(dA, 0);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, OMPTest_1) {
|
||
|
auto maxThreads = ::ompGetMaxThreads();
|
||
|
auto numThreads = ::ompGetNumThreads();
|
||
|
//::setOmpMinThreads(maxThreads);
|
||
|
//::setOmpNumThreads(numThreads);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, CreateTest_1) {
|
||
|
auto xx = ::createContext();
|
||
|
auto yy = ::createStream();
|
||
|
auto zz = ::createEvent();
|
||
|
::destroyEvent(zz);
|
||
|
if (xx)
|
||
|
delete (LaunchContext*)xx;
|
||
|
if (yy)
|
||
|
printf("Stream should be destoyed before.");
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, MemTest_1) {
|
||
|
auto x = NDArrayFactory::create<double>({10, 20, 30, 40, 50});
|
||
|
auto y = NDArrayFactory::create<double>({20, 20, 20, 20, 20});
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
return ;
|
||
|
#endif
|
||
|
//ASSERT_TRUE(0 == ::memcpy(x.buffer(), y.buffer(), x.lengthOf() * sizeof(double), 0, nullptr));
|
||
|
ASSERT_TRUE(0 == ::memcpyAsync(x.buffer(), y.buffer(), x.lengthOf() * sizeof(double), 0, nullptr));
|
||
|
//ASSERT_TRUE(0 == ::memset(x.buffer(), 119, x.lengthOf() * sizeof(double), 0, nullptr));
|
||
|
ASSERT_TRUE(0 == ::memsetAsync(x.buffer(), 119, x.lengthOf() * sizeof(double), 0, nullptr));
|
||
|
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, PullRowsTest_1) {
|
||
|
NDArray x('c', {5, 1}, {0,1,2,3,4});
|
||
|
NDArray z('c', {4, 1}, nd4j::DataType::DOUBLE);
|
||
|
NDArray exp('c', {4, 1}, {0,2,3,4});
|
||
|
|
||
|
Nd4jLong indexes[] = {0,2,3,4};
|
||
|
PointersManager pm(LaunchContext::defaultContext(), "NativeOpsTests::pullRows");
|
||
|
auto pidx = reinterpret_cast<Nd4jLong *>(pm.replicatePointer(indexes, 4 * sizeof(Nd4jLong)));
|
||
|
|
||
|
std::vector<int> dims = {1};
|
||
|
|
||
|
auto xTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims);
|
||
|
auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims);
|
||
|
|
||
|
Nd4jPointer nativeStart[2];
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
nativeStart[1] = (x.getContext()->getCudaStream());
|
||
|
#endif
|
||
|
|
||
|
pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(),
|
||
|
z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
||
|
4, pidx,
|
||
|
xTadPack.platformShapeInfo(), xTadPack.platformOffsets(),
|
||
|
zTadPack.platformShapeInfo(), zTadPack.platformOffsets());
|
||
|
|
||
|
ASSERT_TRUE(z.equalsTo(exp));
|
||
|
pm.synchronize();
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, TadPackTest_1) {
|
||
|
int dimension[] = {1};
|
||
|
int const dimensionLength = 1;
|
||
|
auto x = NDArrayFactory::create<int>('c', {2,3,4});
|
||
|
nd4j::TadPack* pack = ::tadOnlyShapeInfo(x.shapeInfo(),
|
||
|
dimension,
|
||
|
dimensionLength);
|
||
|
ASSERT_TRUE(pack != nullptr);
|
||
|
delete pack;
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, AverageTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1);
|
||
|
exp.linspace(1);
|
||
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
||
|
Nd4jPointer dxList[] = {x.specialBuffer(), x.specialBuffer()};
|
||
|
::average(nullptr,
|
||
|
xList, x.shapeInfo(),
|
||
|
dxList, x.specialShapeInfo(),
|
||
|
z.buffer(), z.shapeInfo(),
|
||
|
z.specialBuffer(), z.specialShapeInfo(),
|
||
|
2,
|
||
|
x.lengthOf(),
|
||
|
true);
|
||
|
// z.printIndexedBuffer("RES");
|
||
|
ASSERT_TRUE(z.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, AccumulateTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1);
|
||
|
exp.linspace(2,2);
|
||
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
||
|
Nd4jPointer dxList[] = {x.specialBuffer(), x.specialBuffer()};
|
||
|
::accumulate(nullptr,
|
||
|
xList, x.shapeInfo(),
|
||
|
dxList, x.specialShapeInfo(),
|
||
|
z.buffer(), z.shapeInfo(),
|
||
|
z.specialBuffer(), z.specialShapeInfo(),
|
||
|
2,
|
||
|
x.lengthOf());
|
||
|
// z.printIndexedBuffer("RES");
|
||
|
ASSERT_TRUE(z.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, P2PTest_1) {
|
||
|
::enableP2P(true);
|
||
|
::checkP2P();
|
||
|
::isP2PAvailable();
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ShuffleTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
||
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(1);
|
||
|
y.linspace(34);
|
||
|
exp.linspace(2,2);
|
||
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
||
|
Nd4jPointer dxList[] = {x.specialBuffer(), y.specialBuffer()};
|
||
|
Nd4jPointer xShapeList[] = {x.shapeInfo(), y.shapeInfo()};
|
||
|
Nd4jPointer dxShapeList[] = {x.specialShapeInfo(), y.specialShapeInfo()};
|
||
|
Nd4jPointer zList[] = {z.buffer(), z.buffer()};
|
||
|
Nd4jPointer dzList[] = {z.specialBuffer(), z.specialBuffer()};
|
||
|
Nd4jPointer zShapeList[] = {z.shapeInfo(), z.shapeInfo()};
|
||
|
Nd4jPointer dzShapeList[] = {z.specialShapeInfo(), z.specialShapeInfo()};
|
||
|
int shuffleMap[] = {1, 0, 4, 3, 2};
|
||
|
auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1});
|
||
|
Nd4jPointer zListOffset[] = {zTadPack.platformOffsets(), zTadPack.platformOffsets()};
|
||
|
Nd4jPointer zListTADs[] = {zTadPack.platformShapeInfo(), zTadPack.platformShapeInfo()};
|
||
|
::shuffle(nullptr,
|
||
|
xList, xShapeList,
|
||
|
dxList, dxShapeList,
|
||
|
zList, zShapeList,
|
||
|
dzList, dzShapeList,
|
||
|
2,
|
||
|
shuffleMap, zListTADs, zListOffset);
|
||
|
// z.printIndexedBuffer("RES");
|
||
|
// x.printIndexedBuffer("INPUT shuffled");
|
||
|
// y.printIndexedBuffer("INPUT 2 shuffled");
|
||
|
// ASSERT_TRUE(z.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, ConvertTypesTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
||
|
|
||
|
auto exp = NDArrayFactory::create<double>('c', {5, 5});
|
||
|
auto z = NDArrayFactory::create<double>('c', {5, 5});
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
x.linspace(2, 2);
|
||
|
exp.linspace(2, 2);
|
||
|
::convertTypes(nullptr, ND4J_FLOAT32, x.buffer(), x.lengthOf(), ND4J_DOUBLE, z.buffer());
|
||
|
ASSERT_TRUE(z.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
//TEST_F(NativeOpsTests, Test_Aggregations_1) {
|
||
|
// NativeOps ops;
|
||
|
// auto x = NDArrayFactory::create<float>('c', {5,5});
|
||
|
// auto y = NDArrayFactory::create<float>('c', {5,5});
|
||
|
//
|
||
|
//
|
||
|
// ops.execAggregate(nullptr, 0, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIndexArguments, maxRealArguments, pointer.data(), nd4j::DataType::FLOAT32);
|
||
|
// void **arguments,
|
||
|
// int numArguments,
|
||
|
// Nd4jLong **shapeArguments,
|
||
|
// int numShapeArguments,
|
||
|
// int *indexArguments,
|
||
|
// int numIndexArguments,
|
||
|
// int **intArrays,
|
||
|
// int numIntArrays,
|
||
|
// void *realArguments,
|
||
|
// int numRealArguments,
|
||
|
// nd4j::DataType dtype
|
||
|
//}
|
||
|
|
||
|
TEST_F(NativeOpsTests, RandomTest_1) {
|
||
|
auto z = NDArrayFactory::create<double>('c', {100});
|
||
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
extra[1] = z.getContext()->getCudaStream();
|
||
|
#endif
|
||
|
graph::RandomGenerator rng(1023, 119);
|
||
|
double p = 0.5;
|
||
|
::execRandom(extra, random::BernoulliDistribution, &rng, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, RandomTest_2) {
|
||
|
auto x = NDArrayFactory::create<double>('c', {100});
|
||
|
auto z = NDArrayFactory::create<double>('c', {100});
|
||
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
extra[1] = z.getContext()->getCudaStream();
|
||
|
#endif
|
||
|
x.linspace(0, 0.01);
|
||
|
graph::RandomGenerator rng(1023, 119);
|
||
|
double p = 0.5;
|
||
|
::execRandom2(extra, random::DropOut, &rng, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, RandomTest_3) {
|
||
|
auto x = NDArrayFactory::create<double>('c', {100});
|
||
|
auto y = NDArrayFactory::create<double>('c', {100});
|
||
|
auto z = NDArrayFactory::create<double>('c', {100});
|
||
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
extra[1] = z.getContext()->getCudaStream();
|
||
|
#endif
|
||
|
x.linspace(0, 0.01);
|
||
|
x.linspace(1, -0.01);
|
||
|
graph::RandomGenerator rng(1023, 119);
|
||
|
double p = 0.5;
|
||
|
::execRandom3(extra, random::ProbablisticMerge, &rng, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, RandomTest_4) {
|
||
|
#ifdef __CUDABLAS__
|
||
|
return ;
|
||
|
#endif
|
||
|
graph::RandomGenerator* rng = (graph::RandomGenerator*)::initRandom(nullptr, 1023, 0, nullptr);
|
||
|
::refreshBuffer(nullptr, 1203L, rng);
|
||
|
::reSeedBuffer(nullptr, 3113L, rng);
|
||
|
::destroyRandom(rng);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTest_1) {
|
||
|
#ifdef __CUDABLAS__
|
||
|
return ;
|
||
|
#endif
|
||
|
auto sortedVals = NDArrayFactory::create<int>(
|
||
|
{10, 1, 5, 120, 34, 5, 78, 138, 3, 111, 331, 29, 91, 71, 73, 50, 56, 4});
|
||
|
auto exp = NDArrayFactory::create<int>({1, 3, 4, 5, 5, 10, 29, 34, 50, 56, 71, 73, 78, 91, 111, 120, 138, 331});
|
||
|
|
||
|
::sort(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(),
|
||
|
sortedVals.specialShapeInfo(), false);
|
||
|
ASSERT_TRUE(sortedVals.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTests_2) {
|
||
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
||
|
auto v = NDArrayFactory::create<double>('c', {10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
||
|
|
||
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||
|
auto ev = NDArrayFactory::create<double>('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
||
|
Nd4jPointer extras[2];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
||
|
#endif
|
||
|
|
||
|
::sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false);
|
||
|
k.tickWriteDevice();
|
||
|
v.tickWriteDevice();
|
||
|
|
||
|
ASSERT_EQ(ek, k);
|
||
|
ASSERT_EQ(ev, v);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTest_3) {
|
||
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
||
|
auto v = NDArrayFactory::create<double>('c', {10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
||
|
|
||
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||
|
auto ev = NDArrayFactory::create<double>('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
||
|
|
||
|
#ifdef __CUDABLAS__
|
||
|
Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
|
||
|
#else
|
||
|
Nd4jPointer extras[2];
|
||
|
#endif
|
||
|
|
||
|
::sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false);
|
||
|
k.tickWriteDevice();
|
||
|
v.tickWriteDevice();
|
||
|
|
||
|
ASSERT_EQ(ek, k);
|
||
|
ASSERT_EQ(ev, v);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTest_4) {
|
||
|
#ifdef __CUDABLAS__
|
||
|
return ;
|
||
|
#endif
|
||
|
auto sortedVals = NDArrayFactory::create<int>('c', {3, 6},
|
||
|
{ 10, 1, 5, 120, 34, 5,
|
||
|
78, 138, 3, 111, 331, 29,
|
||
|
91, 71, 73, 50, 56, 4});
|
||
|
auto exp = NDArrayFactory::create<int>('c', {3, 6}, {1, 5, 5, 10, 34, 120, 3, 29, 78, 111, 138, 331, 4, 50, 56, 71, 73, 91});
|
||
|
|
||
|
std::vector<int> dims({1});
|
||
|
auto packX = ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), {1});
|
||
|
::sortTad(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(),
|
||
|
sortedVals.specialShapeInfo(), dims.data(), dims.size(), packX.platformShapeInfo(), packX.platformOffsets(), false);
|
||
|
// sortedVals.printBuffer("OUT");
|
||
|
// exp.printIndexedBuffer("EXP");
|
||
|
ASSERT_TRUE(sortedVals.equalsTo(exp));
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTests_5) {
|
||
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
||
|
auto v = NDArrayFactory::create<double>('c', {2, 10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5, 1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
||
|
|
||
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||
|
auto ev = NDArrayFactory::create<double>('c', {2, 10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
||
|
|
||
|
Nd4jPointer extras[2];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
||
|
#endif
|
||
|
|
||
|
int axis = 1;
|
||
|
|
||
|
::sortTadByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false);
|
||
|
k.tickWriteDevice();
|
||
|
v.tickWriteDevice();
|
||
|
|
||
|
// k.printIndexedBuffer("k");
|
||
|
// v.printIndexedBuffer("v");
|
||
|
|
||
|
ASSERT_EQ(ek, k);
|
||
|
ASSERT_EQ(ev, v);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, SortTests_6) {
|
||
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
||
|
auto v = NDArrayFactory::create<double>('c', {2, 10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5, 1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
||
|
|
||
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||
|
auto ev = NDArrayFactory::create<double>('c', {2, 10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
||
|
|
||
|
Nd4jPointer extras[2];
|
||
|
#ifdef __CUDABLAS__
|
||
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
||
|
#endif
|
||
|
|
||
|
int axis = 1;
|
||
|
|
||
|
::sortTadByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false);
|
||
|
k.tickWriteDevice();
|
||
|
v.tickWriteDevice();
|
||
|
|
||
|
ASSERT_EQ(ek, k);
|
||
|
ASSERT_EQ(ev, v);
|
||
|
}
|
||
|
|
||
|
//TEST_F(NativeOpsTests, MapTests_1) {
|
||
|
//#ifdef __CUDABLAS__
|
||
|
// return ;
|
||
|
//#endif
|
||
|
//#ifdef GTEST_OS_LINUX
|
||
|
// auto ptrMap = ::mmapFile(nullptr, "/tmp/maptest.$$$", 100LL);
|
||
|
//
|
||
|
// ::munmapFile(nullptr, ptrMap, 100LL);
|
||
|
//#endif
|
||
|
//
|
||
|
//}
|
||
|
|
||
|
TEST_F(NativeOpsTests, MapTests_1) {
|
||
|
//printf("Custom ops: %s\n", ::getAllCustomOps());
|
||
|
//printf("All ops: %s\n", ::getAllOperations());
|
||
|
|
||
|
::getAllCustomOps();
|
||
|
::getAllOperations();
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, CustomOpTest_1) {
|
||
|
auto x = NDArrayFactory::create<float>('c', {1, 6}, {1, 2, 3, 4, 5, 6});
|
||
|
auto z = NDArrayFactory::create<float>('c', {6});
|
||
|
auto e = NDArrayFactory::create<float>('c', {6}, {1, 2, 3, 4, 5, 6});
|
||
|
|
||
|
nd4j::ops::squeeze op;
|
||
|
|
||
|
Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()};
|
||
|
Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()};
|
||
|
|
||
|
Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()};
|
||
|
Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()};
|
||
|
|
||
|
|
||
|
auto status = ::execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false);
|
||
|
ASSERT_EQ(Status::OK(), status);
|
||
|
|
||
|
ASSERT_EQ(e, z);
|
||
|
}
|
||
|
TEST_F(NativeOpsTests, CustomOpTests_2) {
|
||
|
auto array0 = NDArrayFactory::create<float>('c', {3, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
|
||
|
auto array1 = NDArrayFactory::create<float>('c', {3, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
|
||
|
auto z = NDArrayFactory::create<float>('c', {3, 2});
|
||
|
|
||
|
auto exp = NDArrayFactory::create<float>('c', {3, 2}, {2.f, 4.f, 6.f, 8.f, 10.f, 12.f});
|
||
|
Context ctx(1);
|
||
|
|
||
|
NDArray::prepareSpecialUse({&z}, {&array0, &array1});
|
||
|
|
||
|
ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo());
|
||
|
ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo());
|
||
|
ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo());
|
||
|
|
||
|
ASSERT_EQ(2, ctx.width());
|
||
|
|
||
|
nd4j::ops::add op;
|
||
|
::execCustomOp2(nullptr, op.getOpHash(), &ctx);
|
||
|
|
||
|
NDArray::registerSpecialUse({&z}, {&array0, &array1});
|
||
|
|
||
|
ASSERT_EQ(exp, z);
|
||
|
}
|
||
|
TEST_F(NativeOpsTests, CalculateOutputShapeTests_1) {
|
||
|
auto input = NDArrayFactory::create<float>('c', {1, 2, 5, 4});
|
||
|
auto weights = NDArrayFactory::create<float>('c', {2, 2, 2, 3});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {1, 3, 5, 4});
|
||
|
|
||
|
nd4j::ops::conv2d op;
|
||
|
|
||
|
std::vector<double> tArgs({});
|
||
|
std::vector<Nd4jLong> iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1});
|
||
|
|
||
|
Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()};
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
|
||
|
auto shapeList = ::calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size());
|
||
|
|
||
|
ASSERT_EQ(1, shapeList->size());
|
||
|
|
||
|
ASSERT_EQ(exp.rankOf(), shape::rank((Nd4jLong *)shapeList->at(0)));
|
||
|
ASSERT_EQ(exp.sizeAt(0), shape::shapeOf((Nd4jLong *)shapeList->at(0))[0]);
|
||
|
ASSERT_EQ(exp.sizeAt(1), shape::shapeOf((Nd4jLong *)shapeList->at(0))[1]);
|
||
|
ASSERT_EQ(exp.sizeAt(2), shape::shapeOf((Nd4jLong *)shapeList->at(0))[2]);
|
||
|
ASSERT_EQ(exp.sizeAt(3), shape::shapeOf((Nd4jLong *)shapeList->at(0))[3]);
|
||
|
|
||
|
//int *ptr = (int *) shapeList[0];
|
||
|
//delete[] ptr;
|
||
|
//delete shapeList;
|
||
|
|
||
|
::deleteShapeList((Nd4jPointer) shapeList);
|
||
|
}
|
||
|
|
||
|
TEST_F(NativeOpsTests, CalculateOutputShapeTests_2) {
|
||
|
auto input = NDArrayFactory::create<float>('c', {1, 2, 5, 4});
|
||
|
auto weights = NDArrayFactory::create<float>('c', {2, 2, 2, 3});
|
||
|
auto exp = NDArrayFactory::create<float>('c', {1, 3, 5, 4});
|
||
|
|
||
|
nd4j::ops::conv2d op;
|
||
|
|
||
|
std::vector<double> tArgs({});
|
||
|
std::vector<bool> bArgsF({});
|
||
|
std::vector<Nd4jLong> iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1});
|
||
|
|
||
|
Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()};
|
||
|
Nd4jPointer dataPtrs[] = {(Nd4jPointer)input.buffer(), (Nd4jPointer)weights.buffer()};
|
||
|
#ifdef __CUDABLAS__
|
||
|
return;
|
||
|
#endif
|
||
|
|
||
|
auto shapeList = ::calculateOutputShapes2(nullptr, op.getOpHash(), dataPtrs, shapePtrs, 2, const_cast<double*>(tArgs.data()), tArgs.size(),
|
||
|
const_cast<Nd4jLong*>(iArgs.data()), iArgs.size(), nullptr, bArgsF.size());
|
||
|
// Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs
|
||
|
ASSERT_EQ(1, shapeList->size());
|
||
|
|
||
|
ASSERT_EQ(exp.rankOf(), shape::rank((Nd4jLong *)shapeList->at(0)));
|
||
|
ASSERT_EQ(exp.sizeAt(0), shape::shapeOf((Nd4jLong *)shapeList->at(0))[0]);
|
||
|
ASSERT_EQ(exp.sizeAt(1), shape::shapeOf((Nd4jLong *)shapeList->at(0))[1]);
|
||
|
ASSERT_EQ(exp.sizeAt(2), shape::shapeOf((Nd4jLong *)shapeList->at(0))[2]);
|
||
|
ASSERT_EQ(exp.sizeAt(3), shape::shapeOf((Nd4jLong *)shapeList->at(0))[3]);
|
||
|
|
||
|
//int *ptr = (int *) shapeList[0];
|
||
|
//delete[] ptr;
|
||
|
//delete shapeList;
|
||
|
|
||
|
::deleteShapeList((Nd4jPointer) shapeList);
|
||
|
}
|
||
|
|
||
|
//Uncomment when needed only - massive calculations
|
||
|
//TEST_F(NativeOpsTests, BenchmarkTests_1) {
|
||
|
//
|
||
|
// printf("%s\n", ::runLightBenchmarkSuit(true));
|
||
|
// printf("%s\n", ::runFullBenchmarkSuit(true));
|
||
|
//}
|