2019-08-02 19:01:03 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
|
|
*
|
|
|
|
* This program and the accompanying materials are made available under the
|
|
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
//
|
|
|
|
// Created by GS <sgazeos@gmail.com> on 22.07.2019.
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "testlayers.h"
|
|
|
|
#include <NDArray.h>
|
|
|
|
#include <ShapeUtils.h>
|
|
|
|
#include <reduce3.h>
|
|
|
|
#include <ops/declarable/LegacyTransformOp.h>
|
|
|
|
#include <ops/declarable/LegacyPairwiseTransformOp.h>
|
|
|
|
#include <ops/declarable/LegacyScalarOp.h>
|
|
|
|
#include <ops/declarable/LegacyReduceSameOp.h>
|
|
|
|
#include <ops/declarable/LegacyReduceFloatOp.h>
|
|
|
|
#include <ops/declarable/LegacyIndexReduceOp.h>
|
|
|
|
#include <ops/declarable/LegacyBroadcastOp.h>
|
|
|
|
#include <helpers/TAD.h>
|
|
|
|
#include <helpers/ConstantTadHelper.h>
|
|
|
|
#include <type_conversions.h>
|
|
|
|
#include <ops/declarable/CustomOperations.h>
|
|
|
|
using namespace nd4j;
|
|
|
|
using namespace nd4j::ops;
|
|
|
|
|
|
|
|
class NativeOpsTests : public testing::Test {
|
|
|
|
public:
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, CreateContextTests_1) {
|
|
|
|
// auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
// x.assign(1.0);
|
|
|
|
// auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto context = ::createContext();
|
|
|
|
ASSERT_TRUE(context == nullptr);
|
|
|
|
//delete context;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, CreateContextTests_2) {
|
|
|
|
// auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
// x.assign(1.0);
|
|
|
|
// auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto context1 = ::createContext();
|
|
|
|
auto context2 = ::createContext();
|
|
|
|
ASSERT_TRUE(context1 == context2);
|
|
|
|
//delete context1;
|
|
|
|
//delete context2;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, PointerTests_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
|
|
|
// x.linspace(1.0);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::tryPointer(nullptr, x.getBuffer(), 4);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
// exp.assign(-1.0);
|
|
|
|
//
|
|
|
|
// nd4j::ops::LegacyTransformSameOp op(transform::Neg); // Neg
|
|
|
|
// auto result = op.execute({&x}, {}, {});
|
|
|
|
//
|
|
|
|
// ASSERT_EQ(1, result->size());
|
|
|
|
//
|
|
|
|
// auto z = result->at(0);
|
|
|
|
//
|
|
|
|
// ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
//
|
|
|
|
// delete result;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ThresholdTests_1) {
|
|
|
|
// auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
|
|
|
// x.linspace(1.0);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::setElementThreshold(4);
|
|
|
|
ASSERT_TRUE(4 == nd4j::Environment::getInstance()->elementwiseThreshold());
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ThresholdTests_2) {
|
|
|
|
// auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
|
|
|
// x.linspace(1.0);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::setTADThreshold(4);
|
|
|
|
ASSERT_TRUE(4 == nd4j::Environment::getInstance()->tadThreshold());
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecIndexReduce_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5}, {1,2,3,4,5});
|
|
|
|
auto exp = NDArrayFactory::create<Nd4jLong>(120);
|
|
|
|
x.linspace(1.0);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::execIndexReduceScalar(nullptr,
|
|
|
|
indexreduce::IndexMax,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
|
|
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 4LL);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecIndexReduce_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<Nd4jLong>(120);
|
|
|
|
x.linspace(1.0);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
NDArray dimension = NDArrayFactory::create<int>({});
|
|
|
|
::execIndexReduce(nullptr,
|
|
|
|
indexreduce::IndexMax,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
|
|
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 24LL);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecBroadcast_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 1});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.linspace(2,2);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
|
|
|
|
|
|
|
::execBroadcast(nullptr,
|
|
|
|
broadcast::Add,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
|
|
|
ASSERT_TRUE(exp.e<float>(0) == 3.);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecBroadcast_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 1});
|
|
|
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.linspace(2,2);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {(int)0});
|
|
|
|
|
|
|
|
::execBroadcastBool(nullptr,
|
|
|
|
broadcast::EqualTo,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
ASSERT_TRUE(exp.e<bool>(1) && !exp.e<bool>(0));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecPairwise_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.assign(2.);
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::execPairwiseTransform(nullptr,
|
|
|
|
pairwise::Add,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr);
|
|
|
|
ASSERT_TRUE(exp.e<float>(5) == 8.);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ExecPairwise_2) {
|
|
|
|
auto x = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
x.assign(true);
|
|
|
|
y.assign(false);
|
|
|
|
y.t<bool>(5) = true;
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
::execPairwiseTransformBool(nullptr,
|
|
|
|
pairwise::And,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr);
|
|
|
|
ASSERT_TRUE(exp.e<bool>(5) && !exp.e<bool>(4));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
|
|
|
|
|
|
|
::execReduceFloat(nullptr,
|
|
|
|
reduce::Mean,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce Mean");
|
|
|
|
ASSERT_TRUE(exp.e<float>(0) == 13.);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
|
|
|
|
::execReduceSame(nullptr,
|
|
|
|
reduce::Sum,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce Sum");
|
|
|
|
ASSERT_TRUE(exp.e<float>(0) == 325.);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_3) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<bool>(false);
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
|
|
|
|
::execReduceBool(nullptr,
|
|
|
|
reduce::All,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.e<bool>(0) == true);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_4) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<Nd4jLong>(120LL);
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
|
|
|
|
::execReduceLong(nullptr,
|
|
|
|
reduce::CountNonZero,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
|
|
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 25LL);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_5) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<Nd4jLong>(120LL);
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
|
|
|
::execReduceLong2(nullptr,
|
|
|
|
reduce::CountNonZero,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
|
|
|
ASSERT_TRUE(exp.e<Nd4jLong>(0) == 25LL);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_6) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<Nd4jLong>({5, 4, 3, 2, 1});
|
|
|
|
auto exp = NDArrayFactory::create<Nd4jLong>({1,2,3,4,6});
|
|
|
|
x.linspace(1.0);
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
printf("Unsupported for cuda now.\n");
|
|
|
|
#else
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {1});
|
|
|
|
x.p(5, 0);
|
|
|
|
x.p(10, 0); x.p(11, 0);
|
|
|
|
x.p(15, 0); x.p(16, 0); x.p(17, 0);
|
|
|
|
x.p(20, 0); x.p(21, 0); x.p(22, 0); x.p(23, 0);
|
|
|
|
::execReduceLong2(nullptr,
|
|
|
|
reduce::CountNonZero,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
nullptr, nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce CountNonZero");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_7) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
auto z = NDArrayFactory::create<float>(13.);
|
|
|
|
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
x.syncToHost();
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
x.syncToDevice();
|
|
|
|
dimension.syncToHost();
|
|
|
|
::execReduceFloat2(extra,
|
|
|
|
reduce::Mean,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce Mean");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_8) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<float>(120.);
|
|
|
|
auto exp = NDArrayFactory::create<float>(325.);
|
|
|
|
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
x.syncToDevice();
|
|
|
|
|
|
|
|
dimension.syncToHost();
|
|
|
|
|
|
|
|
::execReduceSame2(extra,
|
|
|
|
reduce::Sum,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
z.buffer(), z.shapeInfo(),
|
|
|
|
z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce Sum");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ReduceTest_9) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<bool>(false);
|
|
|
|
auto z = NDArrayFactory::create<bool>(true);
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
x.syncToDevice();
|
|
|
|
|
|
|
|
dimension.syncToHost();
|
|
|
|
::execReduceBool2(extra,
|
|
|
|
reduce::All,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, Reduce3Test_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
auto z = NDArrayFactory::create<float>(650.);
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
|
|
|
|
::execReduce3(extra,
|
|
|
|
reduce3::Dot,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce3 Dot");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, Reduce3Test_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
auto z = NDArrayFactory::create<float>(650.);
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
|
|
|
|
::execReduce3Scalar(extra,
|
|
|
|
reduce3::Dot,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce3 Dot");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, Reduce3Test_3) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
auto z = NDArrayFactory::create<float>(650.);
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
dimension.syncToHost();
|
|
|
|
|
|
|
|
::execReduce3Tad(extra,
|
|
|
|
reduce3::Dot,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
|
|
|
nullptr, nullptr, nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, Reduce3Test_4) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(120.);
|
|
|
|
auto z = NDArrayFactory::create<float>(650.);
|
|
|
|
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {2}, {0, 1});
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
dimension.syncToHost();
|
|
|
|
int* dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
|
|
|
|
auto hTADShapeInfoX = tadPackX.primaryShapeInfo();
|
|
|
|
auto hTADOffsetsX = tadPackX.primaryOffsets();
|
|
|
|
auto hTADShapeInfoY = tadPackY.primaryShapeInfo();
|
|
|
|
auto hTADOffsetsY = tadPackY.primaryOffsets();
|
|
|
|
|
|
|
|
::execReduce3All(extra,
|
|
|
|
reduce3::Dot,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
|
|
|
hTADShapeInfoX, hTADOffsetsX, hTADShapeInfoY, hTADOffsetsY);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ScalarTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>(10.);
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
z.linspace(10., 10.);
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
|
|
|
::execScalar(extra,
|
|
|
|
scalar::Multiply,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(), nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ScalarTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>(10.);
|
|
|
|
auto exp = NDArrayFactory::create<bool>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<bool>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
z.assign(false);
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
|
|
|
::execScalarBool(extra,
|
|
|
|
scalar::GreaterThan,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(), nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.e<bool>(5) == z.e<bool>(5) && exp.e<bool>(15) != z.e<bool>(15));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(0.9);
|
|
|
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
::execSummaryStatsScalar(extra,
|
|
|
|
variance::SummaryStatsVariance,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(), false);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Standard Variance");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(0.9);
|
|
|
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
::execSummaryStats(extra,
|
|
|
|
variance::SummaryStatsVariance,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(), false);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Standard Variance");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SummaryStatsScalarTest_3) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0.1, 0.2, 0.3, -0.3, -0.5, 0.5, 0.7, 0.9, 0.8, 0.1, 0.11, 0.12, 0.5, -0.8, -0.9, 0.4, 0.1, 0.2, 0.3, -0.3, -0.5, 0.2, 0.3, -0.3, -0.5});
|
|
|
|
auto exp = NDArrayFactory::create<float>(0.9);
|
|
|
|
auto z = NDArrayFactory::create<float>(0.21587136);
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
auto dimensions = NDArrayFactory::create<int>({0, 1});
|
|
|
|
::execSummaryStatsTad(extra,
|
|
|
|
variance::SummaryStatsVariance,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
dimensions.buffer(), dimensions.shapeInfo(),
|
|
|
|
dimensions.specialBuffer(), dimensions.specialShapeInfo(),
|
|
|
|
false,
|
|
|
|
nullptr, nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Standard Variance");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, TransformTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
z.linspace(1.);
|
|
|
|
::execTransformFloat(extra,
|
|
|
|
transform::Sqrt,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Sqrt is");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, TransformTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
z.linspace(1.);
|
|
|
|
::execTransformSame(extra,
|
|
|
|
transform::Square,
|
|
|
|
z.buffer(), z.shapeInfo(),
|
|
|
|
z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Square is");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(x));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, TransformTest_3) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<bool>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.);
|
|
|
|
z.assign(true);
|
|
|
|
x.p(24, -25);
|
|
|
|
z.p(24, false);
|
|
|
|
::execTransformBool(extra,
|
|
|
|
transform::IsPositive,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("IsPositive");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, TransformTest_4) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5}, {0, 1, 2, 3, 2, 1, 0, 1.57, 1.57, 1.57, 3.141592, 3.141592,
|
|
|
|
3.141592, 0, 0, 0, 0, 1, 1, 2, 2, 2, 1, 0, 0});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5}, {1., 0.540302, -0.416147, -0.989992, -0.416147, 0.540302, 1.0,
|
|
|
|
0.000796, 0.000796, 0.000796, -1, -1, -1, 1., 1., 1.0, 1.0,
|
|
|
|
0.540302, 0.540302, -0.416147, -0.416147, -0.416147, 0.540302, 1., 1.});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
//z.linspace(1.);
|
|
|
|
::execTransformStrict(extra,
|
|
|
|
transform::Cosine,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
nullptr);
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Cosine");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ScalarTadTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>(10.);
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
z.linspace(10., 10.);
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
|
|
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
|
|
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
|
|
|
|
::execScalarTad(extra,
|
|
|
|
scalar::Multiply,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
|
|
|
tadPackX.primaryShapeInfo(), tadPackX.primaryOffsets(), tadPackZ.primaryShapeInfo(), tadPackZ.primaryOffsets());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("Reduce All");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ScalarTadTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<bool>(true);
|
|
|
|
auto exp = NDArrayFactory::create<bool>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<bool>('c', {5, 5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.assign(false);
|
|
|
|
x.p(5, true);
|
|
|
|
x.p(15, true);
|
|
|
|
//z.linspace(10., 10.);
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
|
|
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
|
|
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
z.assign(true);
|
|
|
|
|
|
|
|
::execScalarBoolTad(extra,
|
|
|
|
scalar::And,
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo(),
|
|
|
|
exp.buffer(), exp.shapeInfo(),
|
|
|
|
exp.specialBuffer(), exp.specialShapeInfo(),
|
|
|
|
y.buffer(), y.shapeInfo(),
|
|
|
|
y.specialBuffer(), y.specialShapeInfo(),
|
|
|
|
nullptr,
|
|
|
|
dimension.buffer(), dimension.shapeInfo(),
|
|
|
|
dimension.specialBuffer(), dimension.specialShapeInfo(),
|
|
|
|
tadPackX.primaryShapeInfo(), tadPackX.primaryOffsets(), tadPackZ.primaryShapeInfo(), tadPackZ.primaryOffsets());
|
|
|
|
// x.printIndexedBuffer("Input");
|
|
|
|
// exp.printIndexedBuffer("And");
|
|
|
|
ASSERT_TRUE(exp.e<bool>(5) == z.e<bool>(5) && exp.e<bool>(15));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, FlattenTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {2, 5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {2, 5,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0,2);
|
|
|
|
y.linspace(2,2);
|
|
|
|
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
|
|
|
auto dimension = NDArrayFactory::create<int>({0, 1});
|
|
|
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
exp(1, {0}).linspace(1,2);
|
|
|
|
::flatten(extra,
|
|
|
|
25, 'c', z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
x.buffer(), x.shapeInfo(),
|
|
|
|
x.specialBuffer(), x.specialShapeInfo());
|
|
|
|
|
|
|
|
// exp.printIndexedBuffer("Exp");
|
|
|
|
// z.printIndexedBuffer("Flatten");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ConcatTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {10,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {10,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.linspace(26);
|
|
|
|
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
[WIP] build time improvements (#106)
* fix pad javadoc and @see links. (#72)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* [WIP] More fixes (#73)
* special tests for ConstantTadHelper/ConstantShapeHelper
Signed-off-by: raver119 <raver119@gmail.com>
* release methods for data buffers
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary buffer Java side
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary buffer Java side
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary TadPack C++/Java side (#74)
Signed-off-by: raver119 <raver119@gmail.com>
* Zoo model TF import test updates (#75)
* argLine fix, update compression_gru comment
* updated comment for xception
* undid but commented argLine change
* updated xlnet comment
* copyright headers
* - new NDArray methods like()/ulike() (#77)
- fix for depthwise_conv2d_bp + special test
Signed-off-by: raver119 <raver119@gmail.com>
* upsampling2d fix CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* DL4J trace logging (#79)
* MLN/CG trace logging for debugging
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tiny tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* strided_slice_bp shape fn leak fix
Signed-off-by: raver119 <raver119@gmail.com>
* SameDiff fixes and naming (#78)
* remove SDVariable inplace methods
* import methods
* npe fix in OpVal
* removed SameDiff inplace ops from tests
* Naming updates, moved to centralized methods in SameDiff, should use op_#:# for everything
* quick fixes
* javadoc
* SDVariable eval with placeholders
* use regex match
* better matching
* fix javadoc. (#76)
* fix javadoc.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* replace most @see with @link s.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* 4 additional tests
Signed-off-by: raver119 <raver119@gmail.com>
* Various DL4J/ND4J fixes (#81)
* #7954 Force refresh of UI when switching tabs on overview page
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8017 Concurrent modification exception (synchronize) fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8033 Don't initialize updater in middle of writing memory crash dump
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8208 Fix shape checks for ND4J int[] creator methods
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #6385 #7992 Keras import naming fixes + cleanup
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8016 Upsampling3D - add NDHWC format support
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Refactor NativeOps.h to export C functions
* Actually export functions from NativeOps.h
* Adapt the Java wrappers in ND4J generated with JavaCPP
* Create C wrappers for some of the C++ classes currently used by ND4J
* remove duplicate code in createBufferDetached. (#83)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Keras model import - updater lr fix (#84)
* Keras model import - updater lr fix
Signed-off-by: eraly <susan.eraly@gmail.com>
* Keras model import - updater lr fix, cleanup
Signed-off-by: eraly <susan.eraly@gmail.com>
* Fix functions of OpaqueVariablesSet
* SameDiff Convolution Config validation, better output methods (#82)
* Conv Config validation & tests
Signed-off-by: Ryan Nett <rnett@skymind.io>
* stackOutputs utility method
Signed-off-by: Ryan Nett <rnett@skymind.io>
* use constructor for validation, support negative kernel sizes (infered from weights)
Signed-off-by: Ryan Nett <rnett@skymind.io>
* better output methods
Signed-off-by: Ryan Nett <rnett@skymind.io>
* move output to be with fit and evaluate
Signed-off-by: Ryan Nett <rnett@skymind.io>
* fixes
Signed-off-by: Ryan Nett <rnett@skymind.io>
* more fixes
Signed-off-by: Ryan Nett <rnett@skymind.io>
* refactor duplicate code from pad methods. (#86)
* refactor duplicate code from pad methods.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* replace switch with if.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Various ND4J/DL4J fixes and improvements (#87)
* Reshape and reallocate - small fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Reshape and reallocate - small fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #6488 ElementWiseVertex broadcast support
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Constructors and broadcast supported it Transforms.max/min
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8054 ElementWiseVertex now supports broadcast inputs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8057 Nd4j.create overload dtype fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7551 ND4J Shape validation fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Numpy boolean import (#91)
* numpy bool type
Signed-off-by: raver119 <raver119@gmail.com>
* numpy bool java side
Signed-off-by: raver119 <raver119@gmail.com>
* remove create method with unused parameter. (#89)
* remove create method with unused parameter.
* removed more unused methods.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* removing more unused code.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* last removal of unused code.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* remove createSparse methods. (#92)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Various ND4J/DL4J fixes (#90)
* Deprecate Old*Op instances
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8063 #8054 Broadcast exceptions + cleanup inplace ops
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Remove bad test condition
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7993 Fix shape function issue in crop_and_resize op
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* DL4J SameDiff lambda layer fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8029 Fix for pnorm backprop math
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8038 Fix Op profiler NaN/Inf triggering + add tests (#93)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* createUninitializedDetached refactoring. (#94)
* wip
* update interface, add null implementations.
* Breaking one test in a weird way.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* createUninitializedDetached refactored.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* cuda build fix for issues introduced by recent refactoring
Signed-off-by: raver119 <raver119@gmail.com>
* [WIP] More of CUDA (#95)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* Implementation of hashcode cuda helper. Working edition.
* Fixed parallel test input arangements.
* Fixed tests for hashcode op.
* Fixed shape calculation for image:crop_and_resize op and test.
* NativeOps tests. Initial test suite.
* Added tests for indexReduce methods.
* Added test on execBroadcast with NDArray as dimensions.
* Added test on execBroadcastBool with NDArray as dimensions.
* Added tests on execPairwiseTransform and execPairwiseTransofrmBool.
* Added tests for execReduce with scalar results.
* Added reduce tests for non-empty dims array.
* Added tests for reduce3.
* Added tests for execScalar.
* Added tests for execSummaryStats.
* - provide cpu/cuda code for batch_to_space
- testing it
Signed-off-by: Yurii <yurii@skymind.io>
* - remove old test for batch_to_space (had wrong format and numbers were not checked)
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed complilation errors with test.
* Added test for execTransformFloat.
* Added test for execTransformSame.
* Added test for execTransformBool.
* Added test for execTransformStrict.
* Added tests for execScalar/execScalarBool with TADs.
* Added test for flatten.
* - provide cpu/cuda code for space_to_Batch operaion
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for concat.
* comment unnecessary stuff in s_t_b
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for specialConcat.
* Added tests for memcpy/set routines.
* Fixed pullRow cuda test.
* Added pullRow test.
* Added average test.
* - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...)
Signed-off-by: Yurii <yurii@skymind.io>
* - debugging and fixing cuda tests in JavaInteropTests file
Signed-off-by: Yurii <yurii@skymind.io>
* - correct some tests
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for shuffle.
* Fixed ops declarations.
* Restored omp and added shuffle test.
* Added convertTypes test.
* Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps.
* Added sort tests.
* Added tests for execCustomOp.
* - further debuging and fixing tests terminated with crash
Signed-off-by: Yurii <yurii@skymind.io>
* Added tests for calculateOutputShapes.
* Addded Benchmarks test.
* Commented benchmark tests.
* change assertion
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for apply_sgd op. Added cpu helper for that op.
* Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps.
* Added test for assign broadcastable.
* Added tests for assign_bp op.
* Added tests for axpy op.
* - assign/execScalar/execTransformAny signature change
- minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed axpy op.
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* - fix tests for nativeOps::concat
Signed-off-by: Yurii <yurii@skymind.io>
* sequential transform/scalar
Signed-off-by: raver119 <raver119@gmail.com>
* allow nested parallelism
Signed-off-by: raver119 <raver119@gmail.com>
* assign_bp leak fix
Signed-off-by: raver119 <raver119@gmail.com>
* block setRNG fix
Signed-off-by: raver119 <raver119@gmail.com>
* enable parallelism by default
Signed-off-by: raver119 <raver119@gmail.com>
* enable nested parallelism by default
Signed-off-by: raver119 <raver119@gmail.com>
* Added cuda implementation for row_count helper.
* Added implementation for tnse gains op helper.
* - take into account possible situations when input arrays are empty in reduce_ cuda stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces.
* Added kernel for tsne/symmetrized op heleper.
* Implementation of tsne/symmetrized op cuda helper. Working edition.
* Eliminated waste printfs.
* Added test for broadcastgradientargs op.
* host-only fallback for empty reduce float
Signed-off-by: raver119 <raver119@gmail.com>
* - some tests fixes
Signed-off-by: Yurii <yurii@skymind.io>
* - correct the rest of reduce_ stuff
Signed-off-by: Yurii <yurii@skymind.io>
* - further correction of reduce_ stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for Cbow op. Also added cuda implementation for cbow helpers.
* - improve code of stack operation for scalar case
Signed-off-by: Yurii <yurii@skymind.io>
* - provide cuda kernel for gatherND operation
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of cbow helpers with cuda kernels.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* - further correction of cuda stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Implementatation of cbow op helper with cuda kernels. Working edition.
* Skip random testing for cudablas case.
* lstmBlockCell context fix
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for ELU and ELU_BP ops.
* Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops.
* Added tests for neq_scalar.
* Added test for noop.
* - further work on clipbynorm_bp
Signed-off-by: Yurii <yurii@skymind.io>
* - get rid of concat op call, use instead direct concat helper call
Signed-off-by: Yurii <yurii@skymind.io>
* lstmBlockCell context fix
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for lrelu and lrelu_bp.
* Added tests for selu and selu_bp.
* Fixed lrelu derivative helpers.
* - some corrections in lstm
Signed-off-by: Yurii <yurii@skymind.io>
* operator * result shape fix
Signed-off-by: raver119 <raver119@gmail.com>
* - correct typo in lstmCell
Signed-off-by: Yurii <yurii@skymind.io>
* few tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA inverse broadcast bool fix
Signed-off-by: raver119 <raver119@gmail.com>
* disable MMAP test for CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* BooleanOp syncToDevice
Signed-off-by: raver119 <raver119@gmail.com>
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* additional data types for im2col/col2im
Signed-off-by: raver119 <raver119@gmail.com>
* Added test for firas_sparse op.
* one more RandomBuffer test excluded
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for flatten op.
* Added test for Floor op.
* bunch of tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* mmulDot tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Implemented floordiv_bp op and tests.
* Fixed scalar case with cuda implementation for bds.
* - work on cuda kernel for clip_by_norm backprop op is completed
Signed-off-by: Yurii <yurii@skymind.io>
* Eliminate cbow crach.
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminated abortion with batched nlp test.
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed shared flag initializing.
* disabled bunch of cpu workspaces tests
Signed-off-by: raver119 <raver119@gmail.com>
* scalar operators fix: missing registerSpecialUse call
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed logdet for cuda and tests.
* - correct clipBynorm_bp
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed crop_and_resize shape datatype.
* - correct some mmul tests
Signed-off-by: Yurii <yurii@skymind.io>
* build fix
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI (#97)
Signed-off-by: raver119 <raver119@gmail.com>
* temporary stack fix
Signed-off-by: raver119 <raver119@gmail.com>
* couple of legacy groups reorganized into separate compialtion units
Signed-off-by: raver119 <raver119@gmail.com>
* wrong include
Signed-off-by: raver119 <raver119@gmail.com>
* wrong include
Signed-off-by: raver119 <raver119@gmail.com>
* ReductionLoops_float split
Signed-off-by: raver119 <raver119@gmail.com>
* maximum
Signed-off-by: raver119 <raver119@gmail.com>
* some more rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* spare ifdef
Signed-off-by: raver119 <raver119@gmail.com>
* mirror pad
Signed-off-by: raver119 <raver119@gmail.com>
* - reduce_float split
- mcmodel
Signed-off-by: raver119 <raver119@gmail.com>
* bad include fix
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax gone
Signed-off-by: raver119 <raver119@gmail.com>
* get back sm
Signed-off-by: raver119 <raver119@gmail.com>
* fix couple of tests for msvc
Signed-off-by: raver119 <raver119@gmail.com>
* fix couple of tests for msvc
Signed-off-by: raver119 <raver119@gmail.com>
* compress-all
Signed-off-by: raver119 <raver119@gmail.com>
* reduced arch list
Signed-off-by: raver119 <raver119@gmail.com>
* compress-all
Signed-off-by: raver119 <raver119@gmail.com>
* reduced arch list
Signed-off-by: raver119 <raver119@gmail.com>
* all compute capabilities option for tests
Signed-off-by: raver119 <raver119@gmail.com>
2019-08-07 16:49:13 +02:00
|
|
|
int d = 0;
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {d});
|
2019-08-02 19:01:03 +02:00
|
|
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
//auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
exp.linspace(1);
|
|
|
|
Nd4jPointer datas[] = {x.buffer(), y.buffer()};
|
|
|
|
Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()};
|
|
|
|
|
|
|
|
::concat(extra,
|
|
|
|
0, 2, datas, shapes, nullptr, nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
|
|
|
// exp.printIndexedBuffer("Exp");
|
|
|
|
// z.printIndexedBuffer("Concat");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ConcatTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {10,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {10,5});
|
|
|
|
|
|
|
|
Nd4jPointer extra[6];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extra[1] = x.getContext()->getCudaStream();
|
|
|
|
extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr;
|
|
|
|
x.syncToHost();
|
|
|
|
y.syncToHost();
|
|
|
|
printf("Unsupported for CUDA platform yet.\n");
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1.0);
|
|
|
|
y.linspace(26);
|
|
|
|
|
|
|
|
//y.assign(2.);
|
|
|
|
x.syncToDevice();
|
|
|
|
z.syncToDevice();
|
[WIP] build time improvements (#106)
* fix pad javadoc and @see links. (#72)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* [WIP] More fixes (#73)
* special tests for ConstantTadHelper/ConstantShapeHelper
Signed-off-by: raver119 <raver119@gmail.com>
* release methods for data buffers
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary buffer Java side
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary buffer Java side
Signed-off-by: raver119 <raver119@gmail.com>
* delete temporary TadPack C++/Java side (#74)
Signed-off-by: raver119 <raver119@gmail.com>
* Zoo model TF import test updates (#75)
* argLine fix, update compression_gru comment
* updated comment for xception
* undid but commented argLine change
* updated xlnet comment
* copyright headers
* - new NDArray methods like()/ulike() (#77)
- fix for depthwise_conv2d_bp + special test
Signed-off-by: raver119 <raver119@gmail.com>
* upsampling2d fix CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* DL4J trace logging (#79)
* MLN/CG trace logging for debugging
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tiny tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* strided_slice_bp shape fn leak fix
Signed-off-by: raver119 <raver119@gmail.com>
* SameDiff fixes and naming (#78)
* remove SDVariable inplace methods
* import methods
* npe fix in OpVal
* removed SameDiff inplace ops from tests
* Naming updates, moved to centralized methods in SameDiff, should use op_#:# for everything
* quick fixes
* javadoc
* SDVariable eval with placeholders
* use regex match
* better matching
* fix javadoc. (#76)
* fix javadoc.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* replace most @see with @link s.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* 4 additional tests
Signed-off-by: raver119 <raver119@gmail.com>
* Various DL4J/ND4J fixes (#81)
* #7954 Force refresh of UI when switching tabs on overview page
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8017 Concurrent modification exception (synchronize) fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8033 Don't initialize updater in middle of writing memory crash dump
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8208 Fix shape checks for ND4J int[] creator methods
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #6385 #7992 Keras import naming fixes + cleanup
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8016 Upsampling3D - add NDHWC format support
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Refactor NativeOps.h to export C functions
* Actually export functions from NativeOps.h
* Adapt the Java wrappers in ND4J generated with JavaCPP
* Create C wrappers for some of the C++ classes currently used by ND4J
* remove duplicate code in createBufferDetached. (#83)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Keras model import - updater lr fix (#84)
* Keras model import - updater lr fix
Signed-off-by: eraly <susan.eraly@gmail.com>
* Keras model import - updater lr fix, cleanup
Signed-off-by: eraly <susan.eraly@gmail.com>
* Fix functions of OpaqueVariablesSet
* SameDiff Convolution Config validation, better output methods (#82)
* Conv Config validation & tests
Signed-off-by: Ryan Nett <rnett@skymind.io>
* stackOutputs utility method
Signed-off-by: Ryan Nett <rnett@skymind.io>
* use constructor for validation, support negative kernel sizes (infered from weights)
Signed-off-by: Ryan Nett <rnett@skymind.io>
* better output methods
Signed-off-by: Ryan Nett <rnett@skymind.io>
* move output to be with fit and evaluate
Signed-off-by: Ryan Nett <rnett@skymind.io>
* fixes
Signed-off-by: Ryan Nett <rnett@skymind.io>
* more fixes
Signed-off-by: Ryan Nett <rnett@skymind.io>
* refactor duplicate code from pad methods. (#86)
* refactor duplicate code from pad methods.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* replace switch with if.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Various ND4J/DL4J fixes and improvements (#87)
* Reshape and reallocate - small fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Reshape and reallocate - small fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #6488 ElementWiseVertex broadcast support
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Constructors and broadcast supported it Transforms.max/min
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8054 ElementWiseVertex now supports broadcast inputs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8057 Nd4j.create overload dtype fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7551 ND4J Shape validation fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Numpy boolean import (#91)
* numpy bool type
Signed-off-by: raver119 <raver119@gmail.com>
* numpy bool java side
Signed-off-by: raver119 <raver119@gmail.com>
* remove create method with unused parameter. (#89)
* remove create method with unused parameter.
* removed more unused methods.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* removing more unused code.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* last removal of unused code.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* remove createSparse methods. (#92)
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* Various ND4J/DL4J fixes (#90)
* Deprecate Old*Op instances
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8063 #8054 Broadcast exceptions + cleanup inplace ops
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Remove bad test condition
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7993 Fix shape function issue in crop_and_resize op
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* DL4J SameDiff lambda layer fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8029 Fix for pnorm backprop math
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #8038 Fix Op profiler NaN/Inf triggering + add tests (#93)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* createUninitializedDetached refactoring. (#94)
* wip
* update interface, add null implementations.
* Breaking one test in a weird way.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* createUninitializedDetached refactored.
Signed-off-by: Robert Altena <Rob@Ra-ai.com>
* cuda build fix for issues introduced by recent refactoring
Signed-off-by: raver119 <raver119@gmail.com>
* [WIP] More of CUDA (#95)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* Implementation of hashcode cuda helper. Working edition.
* Fixed parallel test input arangements.
* Fixed tests for hashcode op.
* Fixed shape calculation for image:crop_and_resize op and test.
* NativeOps tests. Initial test suite.
* Added tests for indexReduce methods.
* Added test on execBroadcast with NDArray as dimensions.
* Added test on execBroadcastBool with NDArray as dimensions.
* Added tests on execPairwiseTransform and execPairwiseTransofrmBool.
* Added tests for execReduce with scalar results.
* Added reduce tests for non-empty dims array.
* Added tests for reduce3.
* Added tests for execScalar.
* Added tests for execSummaryStats.
* - provide cpu/cuda code for batch_to_space
- testing it
Signed-off-by: Yurii <yurii@skymind.io>
* - remove old test for batch_to_space (had wrong format and numbers were not checked)
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed complilation errors with test.
* Added test for execTransformFloat.
* Added test for execTransformSame.
* Added test for execTransformBool.
* Added test for execTransformStrict.
* Added tests for execScalar/execScalarBool with TADs.
* Added test for flatten.
* - provide cpu/cuda code for space_to_Batch operaion
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for concat.
* comment unnecessary stuff in s_t_b
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for specialConcat.
* Added tests for memcpy/set routines.
* Fixed pullRow cuda test.
* Added pullRow test.
* Added average test.
* - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...)
Signed-off-by: Yurii <yurii@skymind.io>
* - debugging and fixing cuda tests in JavaInteropTests file
Signed-off-by: Yurii <yurii@skymind.io>
* - correct some tests
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for shuffle.
* Fixed ops declarations.
* Restored omp and added shuffle test.
* Added convertTypes test.
* Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps.
* Added sort tests.
* Added tests for execCustomOp.
* - further debuging and fixing tests terminated with crash
Signed-off-by: Yurii <yurii@skymind.io>
* Added tests for calculateOutputShapes.
* Addded Benchmarks test.
* Commented benchmark tests.
* change assertion
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for apply_sgd op. Added cpu helper for that op.
* Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps.
* Added test for assign broadcastable.
* Added tests for assign_bp op.
* Added tests for axpy op.
* - assign/execScalar/execTransformAny signature change
- minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed axpy op.
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* - fix tests for nativeOps::concat
Signed-off-by: Yurii <yurii@skymind.io>
* sequential transform/scalar
Signed-off-by: raver119 <raver119@gmail.com>
* allow nested parallelism
Signed-off-by: raver119 <raver119@gmail.com>
* assign_bp leak fix
Signed-off-by: raver119 <raver119@gmail.com>
* block setRNG fix
Signed-off-by: raver119 <raver119@gmail.com>
* enable parallelism by default
Signed-off-by: raver119 <raver119@gmail.com>
* enable nested parallelism by default
Signed-off-by: raver119 <raver119@gmail.com>
* Added cuda implementation for row_count helper.
* Added implementation for tnse gains op helper.
* - take into account possible situations when input arrays are empty in reduce_ cuda stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces.
* Added kernel for tsne/symmetrized op heleper.
* Implementation of tsne/symmetrized op cuda helper. Working edition.
* Eliminated waste printfs.
* Added test for broadcastgradientargs op.
* host-only fallback for empty reduce float
Signed-off-by: raver119 <raver119@gmail.com>
* - some tests fixes
Signed-off-by: Yurii <yurii@skymind.io>
* - correct the rest of reduce_ stuff
Signed-off-by: Yurii <yurii@skymind.io>
* - further correction of reduce_ stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Added test for Cbow op. Also added cuda implementation for cbow helpers.
* - improve code of stack operation for scalar case
Signed-off-by: Yurii <yurii@skymind.io>
* - provide cuda kernel for gatherND operation
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of cbow helpers with cuda kernels.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* - further correction of cuda stuff
Signed-off-by: Yurii <yurii@skymind.io>
* Implementatation of cbow op helper with cuda kernels. Working edition.
* Skip random testing for cudablas case.
* lstmBlockCell context fix
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for ELU and ELU_BP ops.
* Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops.
* Added tests for neq_scalar.
* Added test for noop.
* - further work on clipbynorm_bp
Signed-off-by: Yurii <yurii@skymind.io>
* - get rid of concat op call, use instead direct concat helper call
Signed-off-by: Yurii <yurii@skymind.io>
* lstmBlockCell context fix
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for lrelu and lrelu_bp.
* Added tests for selu and selu_bp.
* Fixed lrelu derivative helpers.
* - some corrections in lstm
Signed-off-by: Yurii <yurii@skymind.io>
* operator * result shape fix
Signed-off-by: raver119 <raver119@gmail.com>
* - correct typo in lstmCell
Signed-off-by: Yurii <yurii@skymind.io>
* few tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA inverse broadcast bool fix
Signed-off-by: raver119 <raver119@gmail.com>
* disable MMAP test for CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* BooleanOp syncToDevice
Signed-off-by: raver119 <raver119@gmail.com>
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* additional data types for im2col/col2im
Signed-off-by: raver119 <raver119@gmail.com>
* Added test for firas_sparse op.
* one more RandomBuffer test excluded
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for flatten op.
* Added test for Floor op.
* bunch of tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* mmulDot tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Implemented floordiv_bp op and tests.
* Fixed scalar case with cuda implementation for bds.
* - work on cuda kernel for clip_by_norm backprop op is completed
Signed-off-by: Yurii <yurii@skymind.io>
* Eliminate cbow crach.
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminated abortion with batched nlp test.
* more tests fixed
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed shared flag initializing.
* disabled bunch of cpu workspaces tests
Signed-off-by: raver119 <raver119@gmail.com>
* scalar operators fix: missing registerSpecialUse call
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed logdet for cuda and tests.
* - correct clipBynorm_bp
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed crop_and_resize shape datatype.
* - correct some mmul tests
Signed-off-by: Yurii <yurii@skymind.io>
* build fix
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI
Signed-off-by: raver119 <raver119@gmail.com>
* exclude two methods for JNI (#97)
Signed-off-by: raver119 <raver119@gmail.com>
* temporary stack fix
Signed-off-by: raver119 <raver119@gmail.com>
* couple of legacy groups reorganized into separate compialtion units
Signed-off-by: raver119 <raver119@gmail.com>
* wrong include
Signed-off-by: raver119 <raver119@gmail.com>
* wrong include
Signed-off-by: raver119 <raver119@gmail.com>
* ReductionLoops_float split
Signed-off-by: raver119 <raver119@gmail.com>
* maximum
Signed-off-by: raver119 <raver119@gmail.com>
* some more rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* spare ifdef
Signed-off-by: raver119 <raver119@gmail.com>
* mirror pad
Signed-off-by: raver119 <raver119@gmail.com>
* - reduce_float split
- mcmodel
Signed-off-by: raver119 <raver119@gmail.com>
* bad include fix
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax
Signed-off-by: raver119 <raver119@gmail.com>
* norelax gone
Signed-off-by: raver119 <raver119@gmail.com>
* get back sm
Signed-off-by: raver119 <raver119@gmail.com>
* fix couple of tests for msvc
Signed-off-by: raver119 <raver119@gmail.com>
* fix couple of tests for msvc
Signed-off-by: raver119 <raver119@gmail.com>
* compress-all
Signed-off-by: raver119 <raver119@gmail.com>
* reduced arch list
Signed-off-by: raver119 <raver119@gmail.com>
* compress-all
Signed-off-by: raver119 <raver119@gmail.com>
* reduced arch list
Signed-off-by: raver119 <raver119@gmail.com>
* all compute capabilities option for tests
Signed-off-by: raver119 <raver119@gmail.com>
2019-08-07 16:49:13 +02:00
|
|
|
int d = 0;
|
|
|
|
auto dimension = NDArrayFactory::create<int>('c', {1}, {d});
|
2019-08-02 19:01:03 +02:00
|
|
|
auto dimensions = reinterpret_cast<int*>(dimension.buffer());
|
|
|
|
//auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());
|
|
|
|
exp.linspace(1);
|
|
|
|
Nd4jPointer datas[] = {x.buffer(), y.buffer()};
|
|
|
|
Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()};
|
|
|
|
|
|
|
|
::specialConcat(extra,
|
|
|
|
0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr);
|
|
|
|
|
|
|
|
// exp.printIndexedBuffer("Exp");
|
|
|
|
// z.printIndexedBuffer("Concat");
|
|
|
|
ASSERT_TRUE(exp.equalsTo(z));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, InitializeTest_1) {
|
|
|
|
// ::initializeDevicesAndFunctions();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, MallocTest_1) {
|
|
|
|
auto a = ::mallocHost(16, 0);
|
|
|
|
::freeHost(a);
|
|
|
|
auto dA = ::mallocDevice(16, 0, 0);
|
|
|
|
::freeDevice(dA, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, OMPTest_1) {
|
|
|
|
auto maxThreads = ::ompGetMaxThreads();
|
|
|
|
auto numThreads = ::ompGetNumThreads();
|
|
|
|
//::setOmpMinThreads(maxThreads);
|
|
|
|
//::setOmpNumThreads(numThreads);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, CreateTest_1) {
|
|
|
|
auto xx = ::createContext();
|
|
|
|
auto yy = ::createStream();
|
|
|
|
auto zz = ::createEvent();
|
|
|
|
::destroyEvent(zz);
|
|
|
|
if (xx)
|
|
|
|
delete (LaunchContext*)xx;
|
|
|
|
if (yy)
|
|
|
|
printf("Stream should be destoyed before.");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, MemTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<double>({10, 20, 30, 40, 50});
|
|
|
|
auto y = NDArrayFactory::create<double>({20, 20, 20, 20, 20});
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return ;
|
|
|
|
#endif
|
|
|
|
//ASSERT_TRUE(0 == ::memcpy(x.buffer(), y.buffer(), x.lengthOf() * sizeof(double), 0, nullptr));
|
|
|
|
ASSERT_TRUE(0 == ::memcpyAsync(x.buffer(), y.buffer(), x.lengthOf() * sizeof(double), 0, nullptr));
|
|
|
|
//ASSERT_TRUE(0 == ::memset(x.buffer(), 119, x.lengthOf() * sizeof(double), 0, nullptr));
|
|
|
|
ASSERT_TRUE(0 == ::memsetAsync(x.buffer(), 119, x.lengthOf() * sizeof(double), 0, nullptr));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, PullRowsTest_1) {
|
|
|
|
NDArray x('c', {5, 1}, {0,1,2,3,4});
|
|
|
|
NDArray z('c', {4, 1}, nd4j::DataType::DOUBLE);
|
|
|
|
NDArray exp('c', {4, 1}, {0,2,3,4});
|
|
|
|
|
|
|
|
Nd4jLong indexes[] = {0,2,3,4};
|
|
|
|
PointersManager pm(LaunchContext::defaultContext(), "NativeOpsTests::pullRows");
|
|
|
|
auto pidx = reinterpret_cast<Nd4jLong *>(pm.replicatePointer(indexes, 4 * sizeof(Nd4jLong)));
|
|
|
|
|
|
|
|
std::vector<int> dims = {1};
|
|
|
|
|
|
|
|
auto xTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims);
|
|
|
|
auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims);
|
|
|
|
|
|
|
|
Nd4jPointer nativeStart[2];
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
nativeStart[1] = (x.getContext()->getCudaStream());
|
|
|
|
#endif
|
|
|
|
|
|
|
|
pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(),
|
|
|
|
z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
4, pidx,
|
|
|
|
xTadPack.platformShapeInfo(), xTadPack.platformOffsets(),
|
|
|
|
zTadPack.platformShapeInfo(), zTadPack.platformOffsets());
|
|
|
|
|
|
|
|
ASSERT_TRUE(z.equalsTo(exp));
|
|
|
|
pm.synchronize();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, TadPackTest_1) {
|
|
|
|
int dimension[] = {1};
|
|
|
|
int const dimensionLength = 1;
|
|
|
|
auto x = NDArrayFactory::create<int>('c', {2,3,4});
|
|
|
|
nd4j::TadPack* pack = ::tadOnlyShapeInfo(x.shapeInfo(),
|
|
|
|
dimension,
|
|
|
|
dimensionLength);
|
|
|
|
ASSERT_TRUE(pack != nullptr);
|
|
|
|
delete pack;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, AverageTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1);
|
|
|
|
exp.linspace(1);
|
|
|
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
|
|
|
Nd4jPointer dxList[] = {x.specialBuffer(), x.specialBuffer()};
|
|
|
|
::average(nullptr,
|
|
|
|
xList, x.shapeInfo(),
|
|
|
|
dxList, x.specialShapeInfo(),
|
|
|
|
z.buffer(), z.shapeInfo(),
|
|
|
|
z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
2,
|
|
|
|
x.lengthOf(),
|
|
|
|
true);
|
|
|
|
// z.printIndexedBuffer("RES");
|
|
|
|
ASSERT_TRUE(z.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, AccumulateTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1);
|
|
|
|
exp.linspace(2,2);
|
|
|
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
|
|
|
Nd4jPointer dxList[] = {x.specialBuffer(), x.specialBuffer()};
|
|
|
|
::accumulate(nullptr,
|
|
|
|
xList, x.shapeInfo(),
|
|
|
|
dxList, x.specialShapeInfo(),
|
|
|
|
z.buffer(), z.shapeInfo(),
|
|
|
|
z.specialBuffer(), z.specialShapeInfo(),
|
|
|
|
2,
|
|
|
|
x.lengthOf());
|
|
|
|
// z.printIndexedBuffer("RES");
|
|
|
|
ASSERT_TRUE(z.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, P2PTest_1) {
|
|
|
|
::enableP2P(true);
|
|
|
|
::checkP2P();
|
|
|
|
::isP2PAvailable();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ShuffleTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto y = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(1);
|
|
|
|
y.linspace(34);
|
|
|
|
exp.linspace(2,2);
|
|
|
|
Nd4jPointer xList[] = {x.buffer(), x.buffer()};
|
|
|
|
Nd4jPointer dxList[] = {x.specialBuffer(), y.specialBuffer()};
|
|
|
|
Nd4jPointer xShapeList[] = {x.shapeInfo(), y.shapeInfo()};
|
|
|
|
Nd4jPointer dxShapeList[] = {x.specialShapeInfo(), y.specialShapeInfo()};
|
|
|
|
Nd4jPointer zList[] = {z.buffer(), z.buffer()};
|
|
|
|
Nd4jPointer dzList[] = {z.specialBuffer(), z.specialBuffer()};
|
|
|
|
Nd4jPointer zShapeList[] = {z.shapeInfo(), z.shapeInfo()};
|
|
|
|
Nd4jPointer dzShapeList[] = {z.specialShapeInfo(), z.specialShapeInfo()};
|
|
|
|
int shuffleMap[] = {1, 0, 4, 3, 2};
|
|
|
|
auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1});
|
|
|
|
Nd4jPointer zListOffset[] = {zTadPack.platformOffsets(), zTadPack.platformOffsets()};
|
|
|
|
Nd4jPointer zListTADs[] = {zTadPack.platformShapeInfo(), zTadPack.platformShapeInfo()};
|
|
|
|
::shuffle(nullptr,
|
|
|
|
xList, xShapeList,
|
|
|
|
dxList, dxShapeList,
|
|
|
|
zList, zShapeList,
|
|
|
|
dzList, dzShapeList,
|
|
|
|
2,
|
|
|
|
shuffleMap, zListTADs, zListOffset);
|
|
|
|
// z.printIndexedBuffer("RES");
|
|
|
|
// x.printIndexedBuffer("INPUT shuffled");
|
|
|
|
// y.printIndexedBuffer("INPUT 2 shuffled");
|
|
|
|
// ASSERT_TRUE(z.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, ConvertTypesTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {5, 5});
|
|
|
|
|
|
|
|
auto exp = NDArrayFactory::create<double>('c', {5, 5});
|
|
|
|
auto z = NDArrayFactory::create<double>('c', {5, 5});
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
x.linspace(2, 2);
|
|
|
|
exp.linspace(2, 2);
|
|
|
|
::convertTypes(nullptr, ND4J_FLOAT32, x.buffer(), x.lengthOf(), ND4J_DOUBLE, z.buffer());
|
|
|
|
ASSERT_TRUE(z.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
//TEST_F(NativeOpsTests, Test_Aggregations_1) {
|
|
|
|
// NativeOps ops;
|
|
|
|
// auto x = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
// auto y = NDArrayFactory::create<float>('c', {5,5});
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// ops.execAggregate(nullptr, 0, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIndexArguments, maxRealArguments, pointer.data(), nd4j::DataType::FLOAT32);
|
|
|
|
// void **arguments,
|
|
|
|
// int numArguments,
|
|
|
|
// Nd4jLong **shapeArguments,
|
|
|
|
// int numShapeArguments,
|
|
|
|
// int *indexArguments,
|
|
|
|
// int numIndexArguments,
|
|
|
|
// int **intArrays,
|
|
|
|
// int numIntArrays,
|
|
|
|
// void *realArguments,
|
|
|
|
// int numRealArguments,
|
|
|
|
// nd4j::DataType dtype
|
|
|
|
//}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, RandomTest_1) {
|
|
|
|
auto z = NDArrayFactory::create<double>('c', {100});
|
|
|
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
extra[1] = z.getContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
graph::RandomGenerator rng(1023, 119);
|
|
|
|
double p = 0.5;
|
|
|
|
::execRandom(extra, random::BernoulliDistribution, &rng, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, RandomTest_2) {
|
|
|
|
auto x = NDArrayFactory::create<double>('c', {100});
|
|
|
|
auto z = NDArrayFactory::create<double>('c', {100});
|
|
|
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
extra[1] = z.getContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
x.linspace(0, 0.01);
|
|
|
|
graph::RandomGenerator rng(1023, 119);
|
|
|
|
double p = 0.5;
|
|
|
|
::execRandom2(extra, random::DropOut, &rng, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, RandomTest_3) {
|
|
|
|
auto x = NDArrayFactory::create<double>('c', {100});
|
|
|
|
auto y = NDArrayFactory::create<double>('c', {100});
|
|
|
|
auto z = NDArrayFactory::create<double>('c', {100});
|
|
|
|
Nd4jPointer extra[] = {nullptr, nullptr};
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
extra[1] = z.getContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
x.linspace(0, 0.01);
|
|
|
|
x.linspace(1, -0.01);
|
|
|
|
graph::RandomGenerator rng(1023, 119);
|
|
|
|
double p = 0.5;
|
|
|
|
::execRandom3(extra, random::ProbablisticMerge, &rng, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &p);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, RandomTest_4) {
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return ;
|
|
|
|
#endif
|
|
|
|
graph::RandomGenerator* rng = (graph::RandomGenerator*)::initRandom(nullptr, 1023, 0, nullptr);
|
|
|
|
::refreshBuffer(nullptr, 1203L, rng);
|
|
|
|
::reSeedBuffer(nullptr, 3113L, rng);
|
|
|
|
::destroyRandom(rng);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTest_1) {
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return ;
|
|
|
|
#endif
|
|
|
|
auto sortedVals = NDArrayFactory::create<int>(
|
|
|
|
{10, 1, 5, 120, 34, 5, 78, 138, 3, 111, 331, 29, 91, 71, 73, 50, 56, 4});
|
|
|
|
auto exp = NDArrayFactory::create<int>({1, 3, 4, 5, 5, 10, 29, 34, 50, 56, 71, 73, 78, 91, 111, 120, 138, 331});
|
|
|
|
|
|
|
|
::sort(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(),
|
|
|
|
sortedVals.specialShapeInfo(), false);
|
|
|
|
ASSERT_TRUE(sortedVals.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTests_2) {
|
|
|
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
|
|
|
auto v = NDArrayFactory::create<double>('c', {10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
|
|
|
|
|
|
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
|
|
|
auto ev = NDArrayFactory::create<double>('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
|
|
|
Nd4jPointer extras[2];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
::sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false);
|
|
|
|
k.tickWriteDevice();
|
|
|
|
v.tickWriteDevice();
|
|
|
|
|
|
|
|
ASSERT_EQ(ek, k);
|
|
|
|
ASSERT_EQ(ev, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTest_3) {
|
|
|
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
|
|
|
auto v = NDArrayFactory::create<double>('c', {10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
|
|
|
|
|
|
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
|
|
|
auto ev = NDArrayFactory::create<double>('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
|
|
|
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()};
|
|
|
|
#else
|
|
|
|
Nd4jPointer extras[2];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
::sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false);
|
|
|
|
k.tickWriteDevice();
|
|
|
|
v.tickWriteDevice();
|
|
|
|
|
|
|
|
ASSERT_EQ(ek, k);
|
|
|
|
ASSERT_EQ(ev, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTest_4) {
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return ;
|
|
|
|
#endif
|
|
|
|
auto sortedVals = NDArrayFactory::create<int>('c', {3, 6},
|
|
|
|
{ 10, 1, 5, 120, 34, 5,
|
|
|
|
78, 138, 3, 111, 331, 29,
|
|
|
|
91, 71, 73, 50, 56, 4});
|
|
|
|
auto exp = NDArrayFactory::create<int>('c', {3, 6}, {1, 5, 5, 10, 34, 120, 3, 29, 78, 111, 138, 331, 4, 50, 56, 71, 73, 91});
|
|
|
|
|
|
|
|
std::vector<int> dims({1});
|
|
|
|
auto packX = ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), {1});
|
|
|
|
::sortTad(nullptr, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(),
|
|
|
|
sortedVals.specialShapeInfo(), dims.data(), dims.size(), packX.platformShapeInfo(), packX.platformOffsets(), false);
|
|
|
|
// sortedVals.printBuffer("OUT");
|
|
|
|
// exp.printIndexedBuffer("EXP");
|
|
|
|
ASSERT_TRUE(sortedVals.equalsTo(exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTests_5) {
|
|
|
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
|
|
|
auto v = NDArrayFactory::create<double>('c', {2, 10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5, 1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
|
|
|
|
|
|
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
|
|
|
auto ev = NDArrayFactory::create<double>('c', {2, 10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
|
|
|
|
|
|
|
Nd4jPointer extras[2];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int axis = 1;
|
|
|
|
|
|
|
|
::sortTadByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false);
|
|
|
|
k.tickWriteDevice();
|
|
|
|
v.tickWriteDevice();
|
|
|
|
|
|
|
|
// k.printIndexedBuffer("k");
|
|
|
|
// v.printIndexedBuffer("v");
|
|
|
|
|
|
|
|
ASSERT_EQ(ek, k);
|
|
|
|
ASSERT_EQ(ev, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, SortTests_6) {
|
|
|
|
auto k = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {1, 3, 5, 9, 0, 2, 4, 6, 7, 8, 1, 3, 5, 9, 0, 2, 4, 6, 7, 8});
|
|
|
|
auto v = NDArrayFactory::create<double>('c', {2, 10}, {1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5, 1.5, 3.5, 5.5, 9.5, 0.5, 2.5, 4.5, 6.5, 7.5, 8.5});
|
|
|
|
|
|
|
|
auto ek = NDArrayFactory::create<Nd4jLong>('c', {2, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
|
|
|
auto ev = NDArrayFactory::create<double>('c', {2, 10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5});
|
|
|
|
|
|
|
|
Nd4jPointer extras[2];
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
extras[1] = LaunchContext::defaultContext()->getCudaStream();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int axis = 1;
|
|
|
|
|
|
|
|
::sortTadByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false);
|
|
|
|
k.tickWriteDevice();
|
|
|
|
v.tickWriteDevice();
|
|
|
|
|
|
|
|
ASSERT_EQ(ek, k);
|
|
|
|
ASSERT_EQ(ev, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
//TEST_F(NativeOpsTests, MapTests_1) {
|
|
|
|
//#ifdef __CUDABLAS__
|
|
|
|
// return ;
|
|
|
|
//#endif
|
|
|
|
//#ifdef GTEST_OS_LINUX
|
|
|
|
// auto ptrMap = ::mmapFile(nullptr, "/tmp/maptest.$$$", 100LL);
|
|
|
|
//
|
|
|
|
// ::munmapFile(nullptr, ptrMap, 100LL);
|
|
|
|
//#endif
|
|
|
|
//
|
|
|
|
//}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, MapTests_1) {
|
|
|
|
//printf("Custom ops: %s\n", ::getAllCustomOps());
|
|
|
|
//printf("All ops: %s\n", ::getAllOperations());
|
|
|
|
|
|
|
|
::getAllCustomOps();
|
|
|
|
::getAllOperations();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, CustomOpTest_1) {
|
|
|
|
auto x = NDArrayFactory::create<float>('c', {1, 6}, {1, 2, 3, 4, 5, 6});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {6});
|
|
|
|
auto e = NDArrayFactory::create<float>('c', {6}, {1, 2, 3, 4, 5, 6});
|
|
|
|
|
|
|
|
nd4j::ops::squeeze op;
|
|
|
|
|
|
|
|
Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()};
|
|
|
|
Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()};
|
|
|
|
|
|
|
|
Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()};
|
|
|
|
Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()};
|
|
|
|
|
|
|
|
|
|
|
|
auto status = ::execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false);
|
|
|
|
ASSERT_EQ(Status::OK(), status);
|
|
|
|
|
|
|
|
ASSERT_EQ(e, z);
|
|
|
|
}
|
|
|
|
TEST_F(NativeOpsTests, CustomOpTests_2) {
|
|
|
|
auto array0 = NDArrayFactory::create<float>('c', {3, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
|
|
|
|
auto array1 = NDArrayFactory::create<float>('c', {3, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
|
|
|
|
auto z = NDArrayFactory::create<float>('c', {3, 2});
|
|
|
|
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {3, 2}, {2.f, 4.f, 6.f, 8.f, 10.f, 12.f});
|
|
|
|
Context ctx(1);
|
|
|
|
|
|
|
|
NDArray::prepareSpecialUse({&z}, {&array0, &array1});
|
|
|
|
|
|
|
|
ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo());
|
|
|
|
ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo());
|
|
|
|
ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo());
|
|
|
|
|
|
|
|
ASSERT_EQ(2, ctx.width());
|
|
|
|
|
|
|
|
nd4j::ops::add op;
|
|
|
|
::execCustomOp2(nullptr, op.getOpHash(), &ctx);
|
|
|
|
|
|
|
|
NDArray::registerSpecialUse({&z}, {&array0, &array1});
|
|
|
|
|
|
|
|
ASSERT_EQ(exp, z);
|
|
|
|
}
|
|
|
|
TEST_F(NativeOpsTests, CalculateOutputShapeTests_1) {
|
|
|
|
auto input = NDArrayFactory::create<float>('c', {1, 2, 5, 4});
|
|
|
|
auto weights = NDArrayFactory::create<float>('c', {2, 2, 2, 3});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {1, 3, 5, 4});
|
|
|
|
|
|
|
|
nd4j::ops::conv2d op;
|
|
|
|
|
|
|
|
std::vector<double> tArgs({});
|
|
|
|
std::vector<Nd4jLong> iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1});
|
|
|
|
|
|
|
|
Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()};
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
auto shapeList = ::calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size());
|
|
|
|
|
|
|
|
ASSERT_EQ(1, shapeList->size());
|
|
|
|
|
|
|
|
ASSERT_EQ(exp.rankOf(), shape::rank((Nd4jLong *)shapeList->at(0)));
|
|
|
|
ASSERT_EQ(exp.sizeAt(0), shape::shapeOf((Nd4jLong *)shapeList->at(0))[0]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(1), shape::shapeOf((Nd4jLong *)shapeList->at(0))[1]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(2), shape::shapeOf((Nd4jLong *)shapeList->at(0))[2]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(3), shape::shapeOf((Nd4jLong *)shapeList->at(0))[3]);
|
|
|
|
|
|
|
|
//int *ptr = (int *) shapeList[0];
|
|
|
|
//delete[] ptr;
|
|
|
|
//delete shapeList;
|
|
|
|
|
|
|
|
::deleteShapeList((Nd4jPointer) shapeList);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(NativeOpsTests, CalculateOutputShapeTests_2) {
|
|
|
|
auto input = NDArrayFactory::create<float>('c', {1, 2, 5, 4});
|
|
|
|
auto weights = NDArrayFactory::create<float>('c', {2, 2, 2, 3});
|
|
|
|
auto exp = NDArrayFactory::create<float>('c', {1, 3, 5, 4});
|
|
|
|
|
|
|
|
nd4j::ops::conv2d op;
|
|
|
|
|
|
|
|
std::vector<double> tArgs({});
|
|
|
|
std::vector<bool> bArgsF({});
|
|
|
|
std::vector<Nd4jLong> iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1});
|
|
|
|
|
|
|
|
Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()};
|
|
|
|
Nd4jPointer dataPtrs[] = {(Nd4jPointer)input.buffer(), (Nd4jPointer)weights.buffer()};
|
|
|
|
#ifdef __CUDABLAS__
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
auto shapeList = ::calculateOutputShapes2(nullptr, op.getOpHash(), dataPtrs, shapePtrs, 2, const_cast<double*>(tArgs.data()), tArgs.size(),
|
|
|
|
const_cast<Nd4jLong*>(iArgs.data()), iArgs.size(), nullptr, bArgsF.size());
|
|
|
|
// Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs
|
|
|
|
ASSERT_EQ(1, shapeList->size());
|
|
|
|
|
|
|
|
ASSERT_EQ(exp.rankOf(), shape::rank((Nd4jLong *)shapeList->at(0)));
|
|
|
|
ASSERT_EQ(exp.sizeAt(0), shape::shapeOf((Nd4jLong *)shapeList->at(0))[0]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(1), shape::shapeOf((Nd4jLong *)shapeList->at(0))[1]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(2), shape::shapeOf((Nd4jLong *)shapeList->at(0))[2]);
|
|
|
|
ASSERT_EQ(exp.sizeAt(3), shape::shapeOf((Nd4jLong *)shapeList->at(0))[3]);
|
|
|
|
|
|
|
|
//int *ptr = (int *) shapeList[0];
|
|
|
|
//delete[] ptr;
|
|
|
|
//delete shapeList;
|
|
|
|
|
|
|
|
::deleteShapeList((Nd4jPointer) shapeList);
|
|
|
|
}
|
|
|
|
|
|
|
|
//Uncomment when needed only - massive calculations
|
|
|
|
//TEST_F(NativeOpsTests, BenchmarkTests_1) {
|
|
|
|
//
|
|
|
|
// printf("%s\n", ::runLightBenchmarkSuit(true));
|
|
|
|
// printf("%s\n", ::runFullBenchmarkSuit(true));
|
|
|
|
//}
|