cavis/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests4.cpp

2420 lines
99 KiB
C++
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#include "testlayers.h"
#include <ops/declarable/CustomOperations.h>
#include <helpers/helper_hash.h>
#include <array/NDArray.h>
2019-06-06 14:21:15 +02:00
#include <array/NDArrayList.h>
using namespace sd;
using namespace sd::graph;
2019-06-06 14:21:15 +02:00
class DeclarableOpsTests4 : public testing::Test {
public:
DeclarableOpsTests4() {
printf("\n");
fflush(stdout);
sd::ops::adjust_hue op0;
sd::ops::adjust_saturation op1;
2019-06-06 14:21:15 +02:00
}
};
template <typename T>
class TypedDeclarableOpsTests4 : public testing::Test {
public:
TypedDeclarableOpsTests4() {
printf("\n");
fflush(stdout);
sd::ops::adjust_hue op0;
sd::ops::adjust_saturation op1;
2019-06-06 14:21:15 +02:00
}
};
typedef ::testing::Types<double, float> TestingTypes;
TYPED_TEST_CASE(TypedDeclarableOpsTests4, TestingTypes);
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_1) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 4, 4, 2});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, {6.f, 7.f, 10.f, 11.f, 22.f, 23.f, 26.f, 27.f, 38.f, 39.f, 42.f, 43.f, 54.f, 55.f, 58.f, 59.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_2) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 4, 4, 2});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, {6.f, 7.f, 10.f, 11.f, 22.f, 23.f, 26.f, 27.f, 38.f, 39.f, 42.f, 43.f, 54.f, 55.f, 58.f, 59.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 0, 0, 1, 1, 0, 1, 1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_3) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 5, 5, 2});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 3, 3, 2}, {7.f, 8.f, 11.f, 12.f, 14.f, 15.f, 27.f, 28.f, 31.f, 32.f, 34.f, 35.f, 42.f, 43.f, 46.f, 47.f, 49.f, 50.f, 57.f, 58.f, 61.f, 62.f, 64.f, 65.f, 77.f, 78.f, 81.f, 82.f, 84.f, 85.f, 92.f, 93.f, 96.f, 97.f, 99.f, 100.f,});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 0, 0, 1, 1, 1, 0, 1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_4) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 5, 5, 2});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, {7.f, 8.f, 11.f, 12.f, 27.f, 28.f, 31.f, 32.f, 57.f, 58.f, 61.f, 62.f, 77.f, 78.f, 81.f, 82.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 0, 0, 1, 1, 0, 1, 1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_5) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 5, 5});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 3, 3}, {1.f, 2.5f, 4.5f, 8.5f, 10.f, 12.f, 18.5f, 20.f, 22.f, 26.f, 27.5f, 29.5f, 33.5f, 35.f, 37.f, 43.5f, 45.f, 47.f, 51.f, 52.5f, 54.5f, 58.5f, 60.f, 62.f, 68.5f, 70.f, 72.f, 76.f, 77.5f, 79.5f, 83.5f, 85.f, 87.f, 93.5f, 95.f, 97.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_6) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 5, 5});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 3, 3}, {0.25f, 1.25f, 2.25f, 4.25f, 10.f, 12.f, 9.25f, 20.f, 22.f, 6.5f, 13.75f, 14.75, 16.75f, 35.f, 37.f, 21.75f, 45.f, 47.f, 12.75f, 26.25f, 27.25f, 29.25f, 60.f, 62.f, 34.25f, 70.f, 72.f, 19.f, 38.75f, 39.75f, 41.75f, 85.f, 87.f, 46.75f, 95.f, 97.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_7) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 5, 5});
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 3, 3}, {4.f, 6.f, 7.5f, 14.f, 16.f, 17.5f, 21.5f, 23.5f, 25.f, 29.f, 31.f, 32.5f, 39.f, 41.f, 42.5f, 46.5f, 48.5f, 50.f, 54.f, 56.f, 57.5f, 64.f, 66.f, 67.5f, 71.5f, 73.5f, 75.f, 79.f, 81.f, 82.5f, 89.f, 91.f, 92.5f, 96.5f, 98.5f, 100.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 2, 2, 0, 0, 1, 1, 1, 0, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_8) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {1, 1, 3, 3});
auto exp = NDArrayFactory::create<TypeParam>('c', {1, 1, 2, 2}, {3.f, 4.f, 6.f, 7.f});
2019-06-06 14:21:15 +02:00
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 1, 1, 0, 0, 1, 1, 0, 0, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_9) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<TypeParam>('c', {1, 1, 3, 3});
auto exp = NDArrayFactory::create<TypeParam>('c', {1, 1, 3, 3}, {3.f, 4.f, 4.5f, 6.f, 7.f, 7.5f, 7.5f, 8.5f, 9.f});
x.linspace(1);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {}, {2, 2, 1, 1, 0, 0, 1, 1, 1, 0, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
//z->printShapeInfo("z shape:");
//z->printBuffer("z buffer:");
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, avgpool2d_10) {
auto input = NDArrayFactory::create<TypeParam>('c', {4, 10, 10, 3}, {9.37125111f, 2.20166993f, 2.91434479f, 5.43639755f, -2.10573769f, 4.08528662f, 5.86908436f, -4.46203756f, 2.21057916f, 5.35849190f, 0.01394637f, 4.40566349f, 7.07982206f, -0.09633455f, 2.42429352f, 3.97301817f, -1.89553940f, 1.99690318f, 6.33141708f, 0.55401880f, 1.70707977f,
5.55204201f, -0.03513752f, 1.60011971f, 2.62700319f, -2.74582434f, 3.06697464f, 1.06277943f, -1.16075921f, -0.78095782f, 9.72352791f, -1.22686064f, 1.99644792f, 7.35571337f, 1.40607321f, 0.11390255f, 9.53334427f, 2.28303599f, -1.66728830f, 6.16678810f, -0.04532295f, -1.97708666f, 9.74906158f, 1.46223176f, -1.46734393f, 4.30761862f,
-1.23790228f, 1.24823606f, 6.13938427f, -3.83689475f, -1.19625473f, 7.91535568f, 6.05868721f, -3.22946382f, 8.81633949f, -0.19967777f, 0.66053957f, 2.30919123f, 0.74543846f, -0.39347672f, 11.11058044f, 0.53720862f, 1.52645731f, 5.70012379f, -1.15213466f, 1.16451406f, 7.00526333f, 1.57362783f, -2.44384766f, 5.54213285f, -1.98828590f,
-0.70483637f, 7.88281822f, -3.59875536f, 0.80745387f, 13.41578484f, -1.55507684f, -0.65855008f, 9.32583523f, -0.14544789f, 0.73436141f, 3.61176538f, -1.71268058f, -2.58490300f, 9.09280205f, -3.27405524f, -2.04569697f, 4.44761324f, -0.62955856f, -2.61917663f, 8.04890442f, 0.54579324f, 0.85929775f, 9.82259560f, -1.93825579f, 0.77703512f,
4.67090321f, -4.79267597f, -2.38906908f, 9.31265545f, 0.96026313f, -1.14109385f, 11.54231834f, -0.01417295f, -0.39500344f, 8.49191666f, 0.55300158f, 2.79490185f, 6.92466164f, 1.72254205f, 2.82222271f, 8.83112717f, 2.95033407f, 2.18054962f, 6.73509789f, -2.22272944f, 0.51127720f, -1.04563558f, 2.15747333f, -2.30959272f, 9.55441570f,
1.50396204f, 1.77370787f, 7.38146257f, -1.79076433f, 3.20961165f, 7.18864202f, 2.91217351f, 0.43018937f, 7.11078024f, -1.17386127f, -0.16817921f, 6.12327290f, -2.82205725f, 3.30696845f, 13.51291752f, -1.30856836f, -2.38332748f, 11.09487438f, -1.47190213f, -0.53050828f, 4.38285351f, -5.07309771f, 1.50714362f, 5.72274446f, -2.85825086f,
-0.89673209f, 3.73791552f, -0.67708802f, -4.13149452f, -0.00671843f, -0.26566532f, 0.32961160f, 7.14501762f, -1.41608179f, -4.96590328f, 12.26205540f, -0.65158135f, -0.88641000f, 6.95777559f, -0.79058206f, -0.10260171f, 7.87169170f, 1.35921454f, 1.11759663f, 5.46187401f, -2.57214499f, 2.48484039f, 4.04043484f, -2.07137156f, -1.42709637f,
9.25487137f, -0.12605135f, -2.66949964f, 2.89412403f, 0.74451172f, -2.96250391f, 3.99258423f, 0.27084303f, 0.32213116f, 5.42332172f, -0.44414216f, 1.70881832f, 6.69346905f, 0.53058422f, -4.73146200f, 4.22051668f, 2.24834967f, 0.66996074f, 4.30173683f, 0.11849818f, -4.07520294f, 8.27318478f, -2.54398274f, -2.86705542f, 10.11775303f,
-0.99382895f, 0.65881538f, 7.93556786f, -1.27934420f, -1.69343162f, 9.68042564f, -1.02609646f, -1.18189347f, 5.75370646f, -1.67888868f, -4.48871994f, 4.79537392f, -0.79212248f, -0.19855022f, 6.15060997f, -0.01081491f, 3.64454579f, 10.82562447f, 1.58859253f, -2.65847278f, 8.60093212f, -1.59196103f, 0.07635692f, 11.76175690f, -1.17453325f,
0.10122013f, 6.86458445f, -2.18891335f, -2.74004745f, 8.07066154f, 0.71818852f, -2.03035975f, 6.31053686f, 0.51509416f, 1.39789927f, 9.43515587f, 2.04256630f, 0.13985133f, 4.65010691f, 2.40911126f, -0.36255789f, -3.06867862f, -0.45225358f, -1.56778407f, 6.05917358f, -1.09891272f, 1.77184200f, 6.46248102f, 0.96042323f, -0.24346280f,
4.63436460f, -4.69907761f, 1.25187206f, 11.46173859f, -2.21917558f, 1.28007793f, 6.92173195f, 2.11268163f, -3.47389889f, 5.08722782f, -3.03950930f, -4.17154264f, 11.30568314f, 0.80361372f, 2.53214502f, 7.18707085f, -4.49114513f, 2.85449266f, 10.14906883f, -0.31974933f, -0.84472644f, -0.52459574f, 0.12921631f, -1.81390119f, 2.76170087f, 1.03982210f, 2.91744232f, -0.29048753f, 5.87453508f, -1.53684759f, 1.85800636f, -0.91404629f, 1.28954852f, 5.11354685f, -2.47475505f, -1.33179152f, 2.58552408f, 1.37316465f, -3.32339454f, 1.54122913f, 3.24953628f, -0.29758382f, 2.82391763f, -1.51142192f, -1.22699404f, 6.75745535f, 0.65452754f, -3.29385471f, 2.06008053f, 2.53172946f, -4.23532820f, -1.53909743f, -0.07010663f, -1.42173731f, 7.29031610f, -0.18448229f, 4.59496164f, 6.73027277f, 0.73441899f, 0.14426160f, 4.14915276f, -2.97010231f, 6.05851364f, 4.95218086f, -2.39145470f, 2.40494704f, 2.10288811f, 0.53503096f, 1.44511235f, 6.66344261f, -3.05803776f, 7.21418667f, 3.30303526f, -0.24163735f, 3.47409391f, 3.64520788f, 2.15189481f, -3.11243272f, 3.62310791f, 0.37379482f, 0.40865007f, -0.83132005f, -4.78246069f, 2.07030797f, 6.51765442f, 3.16178989f, 5.06180477f, 3.78434467f, -0.96689719f, 0.35965276f, 5.89967585f, 1.40294051f, 1.11952639f, 10.59778214f, 0.26739889f, -1.61297631f, 6.24801159f, -0.93914318f, -0.57812452f, 9.92604542f, -0.73025000f, -3.38530874f, 2.45646000f, -2.47949195f, 0.51638460f, 10.65636063f, 1.97816694f, -3.00407791f, 2.66914415f, -0.81951088f, -0.23316640f, 2.40737987f, -2.70007610f, 1.51531935f, 4.08860207f, -0.27552786f, -1.31721711f, 7.11568260f, -3.33498216f, -4.02545023f, 7.22675610f, -0.81690705f, -2.52689576f, 1.04016697f, -0.79291463f, -0.34875512f, 10.00498390f, -4.24167728f, 1.46162593f, 11.82569408f, -1.70359993f, -0.30161047f, 16.44085884f, -0.82253462f, -0.09435523f, 6.13080597f, -0.20259480f, 0.68308711f, 6.15663004f, -6.61776876f, 0.33295766f, 2.55449438f, -0.17819691f, -1.14892209f, 5.56776142f, 1.99279118f, 1.33035934f, 4.45823956f, 3.34916544f, -2.59905386f, 6.16164446f, -2.03881931f, -2.45273542f, 12.46793365f, -2.22743297f, 2.83738565f, 8.48628139f, -1.39347959f, -1.30867767f, 11.08041477f, -4.00363779f, 2.09183025f, 11.30395889f, -2.20504737f, 1.37426853f, 8.98735619f, 1.04676604f, -0.72757077f, 8.28050232f, -6.70741081f, -0.65798020f, 5.68592072f, -0.60760021f, 0.35854483f, 6.26852131f, 1.94100165f, 1.32112014f, 0.80987954f, -1.74617672f, -0.25434083f, 7.16045523f, 1.58884013f, -2.64847064f, 13.14820385f, 1.21393633f, -2.47258949f, 9.41650105f, -0.79384226f, 2.48954105f, 10.95629311f, 0.47723705f, 4.02126694f, 8.02593136f, -2.20726371f, -1.18794477f, 1.50836647f, 0.93118095f, -1.73513174f, 8.85493565f, -2.99670315f, -0.79055870f, 2.39473820f, 2.05046916f, -2.38055134f, 11.82299423f, 0.15609655f, 0.68744308f, 5.66401434f, -0.69281673f, 2.09855556f, 7.74626589f, -0.34283102f, 1.00542057f, 9.95838642f, 0.80161905f, 2.33455157f, 9.80057335f, -0.93561798f, 2.56991577f, 8.29711342f, 0.94213426f, 0.44209945f, 11.70259857f, 0.92710167f, 2.60957146f, 0.24971688f, -0.86529571f, 3.78628922f, 6.80884457f, -0.68178189f, 2.21103406f, 3.18895817f, 0.60283208f, -2.92716241f, 6.72060776f, -1.06625068f, 2.56543374f, 9.97404480f, 3.58080721f, -0.94936347f, 10.16736984f, -1.38464379f, 1.18191063f, 6.66179037f, -3.56115270f, 0.32329530f, 10.90870762f, 2.20638227f, 0.19653285f, 7.34650040f, -3.63859272f, -1.03027737f, 5.98829985f, -3.66606474f, -3.89746714f, 8.63469028f, 1.22569811f, 1.63240814f, 3.74385309f, 0.58243257f, -0.56981975f, 3.69260955f, 1.00979900f, -1.44030499f, 8.57058144f, -1.10648811f, 1.20474911f, 5.43133020f, -2.14822555f, -0.07928789f, 11.25825310f, 0.19645604f, -5.49546146f, 10.41917038f, -0.68178523f, -2.99639869f, 6.50054455f, 0.46488351f, -5.42328453f, 9.09500027f, -2.82107449f, 0.05601966f, 15.34610748f, -0.06820253f, 3.86699796f, 10.73316956f, -3.04795432f, -0.14702171f, 5.64813185f, 1.44028485f, -2.47596145f, 0.07280898f, -3.03187990f, -1.35183525f, 9.35835648f, 2.72966957f, 1.88199532f, 10.36187744f, -0.22834805f, -3.26738238f, 6.92025137f, -2.34061313f, 4.77379704f, 5.28559113
auto exp = NDArrayFactory::create<TypeParam>('c', {4, 4, 4, 3}, {7.97172260f, 0.06878620f, 2.27749538f, 7.29276514f, -0.14074677f, 0.65480286f, 5.70313978f, -0.06546132f, 0.35443667f, 3.70382833f, -0.84020567f, 0.63826996f, 8.60301399f, -0.38236514f, 1.55177069f, 7.37542057f, -0.99374938f, -0.29971302f, 8.84352493f, -0.67121059f, 0.43132120f, 4.78175592f, -1.25070143f, -1.91523600f, 6.03855371f, -0.00292124f, -1.11214364f, 7.90158176f, -0.57949901f, -0.96735370f, 7.81192017f, -0.53255427f, -0.48009714f, 3.16953635f, 0.08353355f, -1.54299748f, 3.74821687f, 1.69396687f, 0.72724354f, 5.42915201f, -1.13686812f, -0.71793109f, 5.78376389f, -0.72239977f, -0.60055625f, 2.53636408f, 0.56777251f, -2.07892323f, 6.08064651f, 0.68620735f, 2.54017019f, 5.65828180f, -0.68255502f, 1.47283304f, 6.10842514f, -0.39655915f, 0.28380761f, 1.96707797f, -1.98206317f, 0.94027776f, 4.71811438f, 0.32104525f, -0.92409706f, 8.34588146f, -1.05581069f, -0.55217457f, 9.58440876f, -0.96549922f, 0.45820439f, 5.65453672f, -2.50953507f, -0.71441835f, 8.03059578f, -0.21281289f, 0.92125505f, 9.26900673f, -0.35963219f, -0.70039093f, 8.59924412f, -1.22358346f, 0.81318003f, 3.85920119f, -0.01305223f, -1.09234154f, 6.33158875f, 1.28094780f, -1.48926139f, 4.94969177f, -0.77126902f, -1.97033751f, 5.64381838f, -0.16285487f, -1.31277227f, 2.39893222f, -1.32902908f, -1.39609122f, 6.47572327f, -0.45267010f, 1.55727172f, 6.70965624f, -1.68735468f, -0.05672536f, 7.25092363f, -0.64613032f, 0.67050058f, 3.60789680f, -2.05948973f, 2.22687531f, 8.15202713f, -0.70148355f, 1.28314006f, 8.14842319f, -1.88807654f, -1.04808438f, 8.45500565f, -0.76425624f, 0.94542569f, 4.56179953f, -0.28786001f, -2.04502511f, 8.46278095f, -0.31019822f, 0.07339200f, 9.34214592f, -0.61948007f, 0.52481830f, 8.32515621f, -1.52418160f, 0.49678251f, 5.11082315f, -1.09908783f, -0.52969611f, 5.27806664f, 0.88632923f, 0.66754371f, 4.75839233f, 0.48928693f, -0.68036932f, 6.56925392f, -0.02949905f, -2.99189186f, 4.46320581f, -0.64534980f, -0.29516968f, 8.60809517f, -1.13120568f, 3.41720533f, 5.84243155f, -1.24109328f, 0.89566326f, 5.99578333f, -0.42496428f, 2.07076764f,
sd::ops::avgpool2d op;
auto result = op.evaluate({&input}, {3,3, 3,3, 0,0, 1,1,1, 0,1});
ASSERT_EQ(Status::OK(), result.status());
auto z = result.at(0);
// z->printIndexedBuffer("z");
// exp.printIndexedBuffer("e");
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, avgpool2d_11) {
int inOutH = 5;// 35;
int inOutW = 5;// 35;
int inOutC = 10;// 192;
auto x = NDArrayFactory::create<double>('c', {1, inOutH, inOutW, inOutC});
x.linspace(1.0);
sd::ops::avgpool2d op;
auto result = op.evaluate({&x}, {3,3, 1,1, 0,0, 1,1, 1, 0, 1});
ASSERT_EQ(Status::OK(), result.status());
auto z = result.at(0);
int totalPadHeight = (inOutH - 1) * 1 + 3 - inOutH;
int padTop = totalPadHeight / 2;
int padBottom = totalPadHeight - totalPadHeight / 2;
int k = 3;
auto m = NDArrayFactory::create<double>('c', {1, inOutH, inOutW, inOutC});
auto c = NDArrayFactory::create<double>('c', {1, inOutH, inOutW, inOutC});
for (int h = 0; h < inOutH; h++) {
for (int w = 0; w < inOutW; w++) {
int hFrom = h - padTop;
int wFrom = w - padBottom;
int hTo = hFrom + k;
int wTo = wFrom + k;
hFrom = sd::math::nd4j_max<int>(0, hFrom);
wFrom = sd::math::nd4j_max<int>(0, wFrom);
hTo = sd::math::nd4j_min<int>(inOutH, hTo);
wTo = sd::math::nd4j_min<int>(inOutW, wTo);
int idxOut[4];
int idxIn[4];
for (int ch = 0; ch < inOutC; ch++) {
idxOut[1] = h;
idxOut[2] = w;
idxOut[3] = ch;
idxIn[3] = ch;
for (int kh = hFrom; kh < hTo; kh++) {
for (int kw = wFrom; kw < wTo; kw++) {
idxIn[1] = kh;
idxIn[2] = kw;
auto inVal = x.e<double>(0, kh, kw, ch);
m.p(0, h, w, ch, inVal + m.e<double>(0, h, w, ch));
c.p(0, h, w, ch, 1 + c.e<int>(0, h, w, ch));
}
}
}
}
}
m /= c;
ASSERT_EQ(m, *z);
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, avgpool2d_12) {
int bS=4, iH=10,iW=10, iC=3, kH=3,kW=3, sH=3,sW=3, pH=0,pW=0, dH=1,dW=1;
int oH=4, oW=4;
int paddingMode = 1; // 1-SAME, 0-VALID
int dataFormat = 1; // 1-NHWC, 0-NDHW
auto input = NDArrayFactory::create<double>('c', {bS, iH, iW, iC});
auto expected = NDArrayFactory::create<double>('c', {bS, oH, oW, iC}, { 17.5, 18.5, 19.5, 25. , 26. , 27. , 34. , 35. , 36. , 41.5, 42.5, 43.5, 92.5, 93.5, 94.5, 100. , 101. , 102. , 109. , 110. , 111. , 116.5, 117.5, 118.5,
182.5, 183.5, 184.5, 190. , 191. , 192. , 199. , 200. , 201. , 206.5, 207.5, 208.5, 257.5, 258.5, 259.5, 265. , 266. , 267. , 274. , 275. , 276. , 281.5, 282.5, 283.5,
317.5, 318.5, 319.5, 325. , 326. , 327. , 334. , 335. , 336. , 341.5, 342.5, 343.5, 392.5, 393.5, 394.5, 400. , 401. , 402. , 409. , 410. , 411. , 416.5, 417.5, 418.5,
482.5, 483.5, 484.5, 490. , 491. , 492. , 499. , 500. , 501. , 506.5, 507.5, 508.5, 557.5, 558.5, 559.5, 565. , 566. , 567. , 574. , 575. , 576. , 581.5, 582.5, 583.5,
617.5, 618.5, 619.5, 625. , 626. , 627. , 634. , 635. , 636. , 641.5, 642.5, 643.5, 692.5, 693.5, 694.5, 700. , 701. , 702. , 709. , 710. , 711. , 716.5, 717.5, 718.5,
782.5, 783.5, 784.5, 790. , 791. , 792. , 799. , 800. , 801. , 806.5, 807.5, 808.5, 857.5, 858.5, 859.5, 865. , 866. , 867. , 874. , 875. , 876. , 881.5, 882.5, 883.5,
917.5, 918.5, 919.5, 925. , 926. , 927. , 934. , 935. , 936. , 941.5, 942.5, 943.5, 992.5, 993.5, 994.5,1000. , 1001. , 1002. ,1009. , 1010. , 1011. ,1016.5, 1017.5, 1018.5,
1082.5, 1083.5, 1084.5,1090. , 1091. , 1092. ,1099. , 1100. , 1101. ,1106.5, 1107.5, 1108.5,1157.5, 1158.5, 1159.5,1165. , 1166. , 1167. ,1174. , 1175. , 1176. ,1181.5, 1182.5, 1183.5});
input.linspace(1.);
sd::ops::avgpool2d op;
auto results = op.evaluate({&input}, {kH,kW, sH,sW, pH,pW, dH,dW, paddingMode, 0, dataFormat});
auto output = results.at(0);
ASSERT_EQ(Status::OK(), results.status());
//output->printIndexedBuffer("output");
//expected.printIndexedBuffer("expected");
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, avgpool2d_13) {
const int bS = 2; // batch size
const int iD = 1; // input depth (number of picture channels, for example rgb=3)
const int iH = 28; // picture height in pixels
const int iW = 28; // picture width in pixels
const int kH = 5; // kernel height in pixels
const int kW = 5; // kernel width in pixels
const int sH = 1; // stride step in horizontal direction
const int sW = 1; // stride step in vertical direction
const int pH = 0; // padding height
const int pW = 0; // padding width
const int dH = 2; // dilation height
const int dW = 2; // dilation width
const int oH = (iH - kH - (kH-1)*(dH-1) + 2*pH)/sH + 1; // output height
const int oW = (iW - kW - (kW-1)*(dW-1) + 2*pW)/sW + 1; // output width
auto x = NDArrayFactory::create_<float>('c', {bS,iD,iH,iW});
auto exp = NDArrayFactory::create<float>('c',{bS,iD,oH,oW});
// auto z('c',{bS,iD,oH,oW});
auto variableSpace = new VariableSpace();
variableSpace->putVariable(-1, x);
// variableSpace->putVariable(1, &z);
auto block = new Context(1, variableSpace, false);
block->fillInputs({-1});
std::vector<int>* argI = block->getIArguments();
*argI = {kH,kW, sH,sW, pH,pW, dW,dH, 0, 0, 0}; // 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
sd::ops::avgpool2d pooling;
Nd4jStatus status = pooling.execute(block);
ASSERT_EQ(ND4J_STATUS_OK, status);
auto result = variableSpace->getVariable(block->getNodeId())->getNDArray();
ASSERT_TRUE(exp.isSameShape(result));
delete variableSpace;
delete block;
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, avgpool2d_14) {
const int bS = 2;
const int iD = 1;
const int iH = 28;
const int iW = 28;
const int kH = 5;
const int kW = 5;
const int sH = 1;
const int sW = 1;
const int pH = 0;
const int pW = 0;
const int dH = 1;
const int dW = 1;
const int oH = (iH - kH - (kH-1)*(dH-1) + 2*pH)/sH + 1; // output height
const int oW = (iW - kW - (kW-1)*(dW-1) + 2*pW)/sW + 1; // output width
auto x = NDArrayFactory::create_<float>('c', {bS,iD,iH,iW});
auto exp = NDArrayFactory::create<float>('c',{bS,iD,oH,oW});
// auto z('c',{bS,iD,oH,oW});
auto variableSpace = new VariableSpace();
variableSpace->putVariable(-1, x);
// variableSpace->putVariable(1, &z);
auto block = new Context(1, variableSpace, false);
block->fillInputs({-1});
std::vector<int>* argI = block->getIArguments();
*argI = {kH,kW, sH,sW, pH,pW, dW,dH, 0, 0, 0}; // 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
sd::ops::avgpool2d pooling;
Nd4jStatus status = pooling.execute(block);
ASSERT_EQ(ND4J_STATUS_OK, status);
auto result = variableSpace->getVariable(block->getNodeId())->getNDArray();
// result->printShapeInfo();
ASSERT_TRUE(exp.isSameShape(result));
delete variableSpace;
delete block;
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, Avgpool2d_test15) {
const int bS = 2;
const int iD = 1;
const int iH = 28;
const int iW = 28;
const int kH = 5;
const int kW = 5;
const int sH = 1;
const int sW = 1;
const int pH = 0;
const int pW = 0;
const int dH = 1;
const int dW = 1;
const int oH = (int) sd::math::nd4j_ceil<float, int>(iH * 1.f / sH);
const int oW = (int) sd::math::nd4j_ceil<float, int>(iW * 1.f / sW);
auto x = NDArrayFactory::create_<float>('c', {bS,iD,iH,iW});
auto exp = NDArrayFactory::create<float>('c',{bS,iD,oH,oW});
// auto z('c',{bS,iD,oH,oW});
auto variableSpace = new VariableSpace();
variableSpace->putVariable(-1, x);
// variableSpace->putVariable(1, &z);
auto block = new Context(1, variableSpace, false);
block->fillInputs({-1});
std::vector<int>* argI = block->getIArguments();
*argI = {kH,kW, sH,sW, pH,pW, dW,dH, 1, 0, 0}; // 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
sd::ops::avgpool2d pooling;
Nd4jStatus status = pooling.execute(block);
ASSERT_EQ(ND4J_STATUS_OK, status);
auto result = variableSpace->getVariable(block->getNodeId())->getNDArray();
// result->printShapeInfo();
ASSERT_TRUE(exp.isSameShape(result));
delete variableSpace;
delete block;
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, avgpool2d_16) {
int bS=2, iH=4,iW=4, iC=2, kH=2,kW=2, sH=2,sW=2, pH=0,pW=0, dH=1,dW=1;
int oH=2,oW=2;
int paddingMode = 1; // 1-SAME, 0-VALID
int dataFormat = 1; // 1-NHWC, 0-NDHW
NDArray input('c', {bS, iH, iW, iC}, sd::DataType::FLOAT32);
NDArray output('f', {bS, oH, oW, iC}, sd::DataType::FLOAT32);
NDArray expected('c', {bS, oH, oW, iC}, {6.f, 7.f, 10.f, 11.f, 22.f, 23.f, 26.f, 27.f, 38.f, 39.f, 42.f, 43.f, 54.f, 55.f, 58.f, 59.f}, sd::DataType::FLOAT32);
input.linspace(1.);
sd::ops::avgpool2d op;
auto status = op.execute({&input}, {&output}, {}, {kH,kW, sH,sW, pH,pW, dH,dW, paddingMode, 0, dataFormat}, {});
ASSERT_EQ(Status::OK(), status);
// output.printBuffer();
//expected.printIndexedBuffer("expected");
ASSERT_TRUE(expected.equalsTo(output));
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, biasadd_1) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<double>('c', {2, 3, 3, 2});
auto bias = NDArrayFactory::create<double>('c', {2}, {1, 2});
2019-06-06 14:21:15 +02:00
auto exp = NDArrayFactory::create<double>('c', {2, 3, 3, 2}, {1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f});
sd::ops::biasadd op;
auto result = op.evaluate({&x, &bias}, {}, {}, {});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, biasadd_2) {
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<double>('c', {2, 2, 3, 3});
auto bias = NDArrayFactory::create<double>('c', {2}, {1, 2});
auto exp = NDArrayFactory::create<double>('c', {2, 2, 3, 3}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2});
2019-06-06 14:21:15 +02:00
sd::ops::biasadd op;
auto result = op.evaluate({&x, &bias}, {}, {}, {true});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, biasadd_3) {
auto x = NDArrayFactory::create<double>('c', {2, 3});
auto row = NDArrayFactory::create<double>('c', {3}, {1, 2, 3});
auto exp = NDArrayFactory::create<double>('c', {2, 3}, {1, 2, 3, 1, 2, 3});
sd::ops::biasadd op;
auto result = op.evaluate({&x, &row}, {}, {}, {true});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
auto z = result.at(0);
ASSERT_TRUE(exp.isSameShape(z));
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, biasadd_bp_1) {
NDArray x('c', {2,2,2,3}, {1.,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}, sd::DataType::FLOAT32);
NDArray gradO('c', {2,2,2,3}, sd::DataType::FLOAT32);
NDArray bias('c', {3}, {-1., -2, -3}, sd::DataType::FLOAT32);
NDArray expGradB('c', {3}, {9.2, 10. , 10.8}, sd::DataType::FLOAT32);
gradO.linspace(0.1, 0.1);
sd::ops::biasadd_bp op;
auto result = op.evaluate({&x, &bias, &gradO}, {}, {}, {false}); // NHWC
ASSERT_EQ(ND4J_STATUS_OK, result.status());
auto gradI = result.at(0);
auto gradB = result.at(1);
ASSERT_TRUE(gradI->isSameShape(gradO));
ASSERT_TRUE(gradI->equalsTo(gradO));
ASSERT_TRUE(gradB->isSameShape(expGradB));
ASSERT_TRUE(gradB->equalsTo(expGradB));
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, biasadd_bp_2) {
NDArray x('c', {2,3,2,2}, {1.,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}, sd::DataType::FLOAT32);
NDArray gradO('c', {2,3,2,2}, sd::DataType::FLOAT32);
NDArray bias('c', {3}, {-1., -2, -3}, sd::DataType::FLOAT32);
NDArray expGradB('c', {3}, {6.8, 10., 13.2}, sd::DataType::FLOAT32);
gradO.linspace(0.1, 0.1);
sd::ops::biasadd_bp op;
auto result = op.evaluate({&x, &bias, &gradO}, {}, {}, {true}); // NCHW
ASSERT_EQ(ND4J_STATUS_OK, result.status());
auto gradI = result.at(0);
auto gradB = result.at(1);
ASSERT_TRUE(gradI->isSameShape(gradO));
ASSERT_TRUE(gradI->equalsTo(gradO));
ASSERT_TRUE(gradB->isSameShape(expGradB));
ASSERT_TRUE(gradB->equalsTo(expGradB));
}
TEST_F(DeclarableOpsTests4, biasadd_4) {
if (!Environment::getInstance()->isExperimentalBuild())
return;
auto x = NDArrayFactory::create<double>('c', {2, 3});
auto y = NDArrayFactory::create<float>('c', {3}, {1.f, 2.f, 3.f});
auto z = NDArrayFactory::create<float>('c', {2, 3});
auto exp = NDArrayFactory::create<float>('c', {2, 3}, {1.f, 2.f, 3.f, 1.f, 2.f, 3.f});
sd::ops::biasadd op;
auto status = op.execute({&x, &y}, {&z}, {}, {}, {true});
ASSERT_EQ(Status::OK(), status);
ASSERT_EQ(exp, z);
}
2019-06-06 14:21:15 +02:00
TEST_F(DeclarableOpsTests4, Test_Fill_1) {
auto x = NDArrayFactory::create<int>('c', {1, 3}, {3, 2, 4});
auto v = NDArrayFactory::create<double>(2.);
auto exp = NDArrayFactory::create<double>('c', {3, 2, 4});
exp.assign(2.0f);
sd::ops::fill op;
auto result = op.evaluate({&x, &v});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
TEST_F(DeclarableOpsTests4, Test_FirasSparce_1) {
auto x = NDArrayFactory::create<double>('c', {1, 81});
auto exp = NDArrayFactory::create<double>('c', {1, 2}, {0, 1});
x.p(51, 1);
x.p(52, 0);
x.p(60, 1);
x.p(61, 0);
sd::ops::firas_sparse op;
auto result = op.evaluate({&x}, {0, 1});
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
auto z = result.at(0);
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
// z->printIndexedBuffer("FIRAS");
// z->printShapeInfo("OUTSHAPE");
// ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
}
TEST_F(DeclarableOpsTests4, Test_FlattenTests_1) {
auto x = NDArrayFactory::create<double>('c', {3, 3, 3, 3});
auto exp = NDArrayFactory::create<double>('c', {81});
x.linspace(1);
exp.linspace(1);
sd::ops::flatten op;
auto result = op.evaluate({&x}, {}, {'c'});
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
auto z = result.at(0);
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
// z->printIndexedBuffer("Flatten1");
// z->printShapeInfo("Flatten1 shape");
ASSERT_TRUE(exp.equalsTo(z));
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
}
TEST_F(DeclarableOpsTests4, Test_FlattenTests_2) {
auto x = NDArrayFactory::create<double>('c', {3, 3, 3, 3});
auto y = NDArrayFactory::create<double>('c', {3, 3});
auto exp = NDArrayFactory::create<double>('c', {90});
x.linspace(1);
y.linspace(82);
exp.linspace(1);
sd::ops::flatten op;
auto result = op.evaluate({&x, &y}, {}, {'c'});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
auto z = result.at(0);
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
// z->printIndexedBuffer("Flatten2");
// z->printShapeInfo("Flatten2 shape");
ASSERT_TRUE(exp.equalsTo(z));
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
}
TEST_F(DeclarableOpsTests4, Test_FlattenTests_3) {
NDArray x('c', {2,2}, {1, 2, 3, 4}, sd::DataType::INT32);
NDArray y('f', {2,2}, sd::DataType::INT32);
NDArray exp('c', {8}, {1, 2, 3, 4, 1, 2, 3, 4}, sd::DataType::INT32);
y.assign(x);
sd::ops::flatten op;
auto result = op.evaluate({&x, &y}, {}, {'c'});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
auto z = result.at(0);
ASSERT_TRUE(exp.equalsTo(z));
}
TEST_F(DeclarableOpsTests4, Test_FlattenTests_4) {
NDArray x('c', {2,2}, {1, 2, 3, 4}, sd::DataType::INT32);
NDArray y('f', {2,2}, sd::DataType::INT32);
NDArray exp('c', {8}, {1, 3, 2, 4, 1, 3, 2, 4}, sd::DataType::INT32);
y.assign(x);
sd::ops::flatten op;
auto result = op.evaluate({&x, &y}, {}, {'f'});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
auto z = result.at(0);
ASSERT_TRUE(exp.equalsTo(z));
}
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
TEST_F(DeclarableOpsTests4, Test_FloorTests_1) {
auto x = NDArrayFactory::create<double>('c', {3, 3}, {1.5, 2.3, 3.4, 4.3, 5.9, 6.1, 7.2, 8.9, 9.7});
auto exp = NDArrayFactory::create<double>('c', {3,3});
exp.linspace(1);
sd::ops::Floor op;
auto result = op.evaluate({&x});
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
auto z = result.at(0);
[WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii <yurii@skymind.io> * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii <yurii@skymind.io> * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii <yurii@skymind.io> * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii <yurii@skymind.io> * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii <yurii@skymind.io> * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii <yurii@skymind.io> * - correct some tests Signed-off-by: Yurii <yurii@skymind.io> * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii <yurii@skymind.io> * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 <raver119@gmail.com> * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Fixed axpy op. * meh Signed-off-by: raver119 <raver119@gmail.com> * - fix tests for nativeOps::concat Signed-off-by: Yurii <yurii@skymind.io> * sequential transform/scalar Signed-off-by: raver119 <raver119@gmail.com> * allow nested parallelism Signed-off-by: raver119 <raver119@gmail.com> * assign_bp leak fix Signed-off-by: raver119 <raver119@gmail.com> * block setRNG fix Signed-off-by: raver119 <raver119@gmail.com> * enable parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * enable nested parallelism by default Signed-off-by: raver119 <raver119@gmail.com> * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 <raver119@gmail.com> * - some tests fixes Signed-off-by: Yurii <yurii@skymind.io> * - correct the rest of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * - further correction of reduce_ stuff Signed-off-by: Yurii <yurii@skymind.io> * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for gatherND operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * - further correction of cuda stuff Signed-off-by: Yurii <yurii@skymind.io> * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii <yurii@skymind.io> * lstmBlockCell context fix Signed-off-by: raver119 <raver119@gmail.com> * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii <yurii@skymind.io> * operator * result shape fix Signed-off-by: raver119 <raver119@gmail.com> * - correct typo in lstmCell Signed-off-by: Yurii <yurii@skymind.io> * few tests fixed Signed-off-by: raver119 <raver119@gmail.com> * CUDA inverse broadcast bool fix Signed-off-by: raver119 <raver119@gmail.com> * disable MMAP test for CUDA Signed-off-by: raver119 <raver119@gmail.com> * BooleanOp syncToDevice Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * additional data types for im2col/col2im Signed-off-by: raver119 <raver119@gmail.com> * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 <raver119@gmail.com> * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 <raver119@gmail.com> * mmulDot tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii <yurii@skymind.io> * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 <raver119@gmail.com> * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 <raver119@gmail.com> * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 <raver119@gmail.com> * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii <yurii@skymind.io> * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii <yurii@skymind.io>
2019-08-02 19:01:03 +02:00
// z->printIndexedBuffer("Flatten1");
// z->printShapeInfo("Flatten1 shape");
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Split_1) {
auto x = NDArrayFactory::create<double>('c', {5, 30});
auto sizes = NDArrayFactory::create<int>('c', {1, 3}, {4, 15, 11});
std::vector<Nd4jLong> list0({0,0, 0,4});
std::vector<Nd4jLong> list1({0,0, 4,19});
std::vector<Nd4jLong> list2({0,0, 19,30});
auto sub0 = x(list0, true);
auto sub1 = x(list1, true);
auto sub2 = x(list2, true);
sub0.assign(0.0);
sub1.assign(1.0);
sub2.assign(2.0);
sd::ops::split_v op;
auto result = op.evaluate({&x, &sizes}, {}, {1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
ASSERT_EQ(3, result.size());
2019-06-06 14:21:15 +02:00
auto z0 = result.at(0);
auto z1 = result.at(1);
auto z2 = result.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(sub0.isSameShape(z0));
ASSERT_TRUE(sub1.isSameShape(z1));
ASSERT_TRUE(sub2.isSameShape(z2));
ASSERT_TRUE(sub0.equalsTo(z0));
ASSERT_TRUE(sub1.equalsTo(z1));
ASSERT_TRUE(sub2.equalsTo(z2));
2019-06-06 14:21:15 +02:00
}
// special test for TF mode, when axis goes first
TEST_F(DeclarableOpsTests4, Test_Split_2) {
auto x = NDArrayFactory::create<double>('c', {5, 12});
auto axis = NDArrayFactory::create<double>('c', {1, 1}, {1.f});
std::vector<Nd4jLong> list0 = {0,0, 0,3};
std::vector<Nd4jLong> list1 = {0,0, 3,6};
std::vector<Nd4jLong> list2 = {0,0, 6,9};
std::vector<Nd4jLong> list3 = {0,0, 9,12};
auto sub0 = x(list0, true);
auto sub1 = x(list1, true);
auto sub2 = x(list2, true);
auto sub3 = x(list3, true);
sub0.assign(0.0f);
sub1.assign(1.0f);
sub2.assign(2.0f);
sub3.assign(3.0f);
sd::ops::split op;
auto result = op.evaluate({&axis, &x}, {}, {4});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z0 = result.at(0);
auto z1 = result.at(1);
auto z2 = result.at(2);
auto z3 = result.at(3);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(sub0.isSameShape(z0));
ASSERT_TRUE(sub1.isSameShape(z1));
ASSERT_TRUE(sub2.isSameShape(z2));
ASSERT_TRUE(sub3.isSameShape(z3));
ASSERT_TRUE(sub0.equalsTo(z0));
ASSERT_TRUE(sub1.equalsTo(z1));
ASSERT_TRUE(sub2.equalsTo(z2));
ASSERT_TRUE(sub3.equalsTo(z3));
2019-06-06 14:21:15 +02:00
}
// special test for TF mode, when axis goes first
TEST_F(DeclarableOpsTests4, Test_Split_3) {
auto x = NDArrayFactory::create<double>('c', {6, 12});
auto axis = NDArrayFactory::create<double>('c', {1, 1}, {0.f});
std::vector<Nd4jLong> list0 = {0,2, 0,0};
std::vector<Nd4jLong> list1 = {2,4, 0,0};
std::vector<Nd4jLong> list2 = {4,6, 0,0};
auto sub0 = x(list0, true);
auto sub1 = x(list1, true);
auto sub2 = x(list2, true);
sub0.assign(0.0f);
sub1.assign(1.0f);
sub2.assign(2.0f);
sd::ops::split op;
auto result = op.evaluate({&axis, &x}, {}, {3});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z0 = result.at(0);
auto z1 = result.at(1);
auto z2 = result.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(sub0.isSameShape(z0));
ASSERT_TRUE(sub1.isSameShape(z1));
ASSERT_TRUE(sub2.isSameShape(z2));
ASSERT_TRUE(sub0.equalsTo(z0));
ASSERT_TRUE(sub1.equalsTo(z1));
ASSERT_TRUE(sub2.equalsTo(z2));
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, split_test4) {
auto input = NDArrayFactory::create<double>('c', {10},{1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,9.f,10.f});
auto axis = NDArrayFactory::create<double>(-1);
auto exp1 = NDArrayFactory::create<double>('c', {5}, {1.f,2.f,3.f,4.f,5.f});
auto exp2 = NDArrayFactory::create<double>('c', {5}, {6.f,7.f,8.f,9.f,10.f});
sd::ops::split op;
auto results = op.evaluate({&input, &axis}, {}, {2}, {});
ASSERT_EQ(ND4J_STATUS_OK, results.status());
auto out1 = results.at(0);
auto out2 = results.at(1);
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.equalsTo(out2));
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, split_test5) {
auto input = NDArrayFactory::create<double>('c', {3,8},{1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,9.f,10.f,11.f,12.f,13.f,14.f,15.f,16.f,17.f,18.f,19.f,20.f,21.f,22.f,23.f,24.f});
auto exp1 = NDArrayFactory::create<double>('c', {3,4}, {1.f,2.f,3.f,4.f, 9.f,10.f,11.f,12.f, 17.f,18.f,19.f,20.f});
auto exp2 = NDArrayFactory::create<double>('c', {3,4}, {5.f,6.f,7.f,8.f, 13.f,14.f,15.f,16.f, 21.f,22.f,23.f,24.f});
sd::ops::split op;
auto results = op.evaluate({&input}, {}, {2,-1},{});
ASSERT_EQ(ND4J_STATUS_OK, results.status());
auto out1 = results.at(0);
auto out2 = results.at(1);
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.equalsTo(out2));
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, split_test6) {
NDArray input('c', {0,4}, sd::DataType::FLOAT32);
std::vector<Nd4jLong> expShape = {0,1};
const int numSplits = 4;
const int axis = 1;
sd::ops::split op;
auto results = op.evaluate({&input}, {}, {numSplits, axis}, {});
ASSERT_EQ(ND4J_STATUS_OK, results.status());
for (int i = 0; i < numSplits; ++i)
ASSERT_TRUE(results.at(i)->isSameShape(expShape));
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, split_test7) {
NDArray input('c', {0,4}, sd::DataType::FLOAT32);
std::vector<Nd4jLong> expShape = {0,4};
const int numSplits = 4;
const int axis = 0;
2019-06-06 14:21:15 +02:00
sd::ops::split op;
auto results = op.evaluate({&input}, {}, {numSplits, axis}, {});
ASSERT_EQ(ND4J_STATUS_OK, results.status());
for (int i = 0; i < numSplits; ++i)
ASSERT_TRUE(results.at(i)->isSameShape(expShape));
2019-06-06 14:21:15 +02:00
}
2019-06-06 14:21:15 +02:00
TEST_F(DeclarableOpsTests4, Test_Squeeze_args_1) {
auto x = NDArrayFactory::create<double>('c', {2, 1, 1, 1, 2}, {1, 2, 3, 4});
auto exp = NDArrayFactory::create<double>('c', {2, 1, 2}, {1, 2, 3, 4});
sd::ops::squeeze op;
auto result = op.evaluate({&x}, {}, {1, 3});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Squeeze_args_2) {
auto x = NDArrayFactory::create<double>('c', {2, 1, 1, 1, 2}, {1, 2, 3, 4});
auto y = NDArrayFactory::create<double>('c', {2}, {1.f, 3.f});
auto exp = NDArrayFactory::create<double>('c', {2, 1, 2}, {1, 2, 3, 4});
sd::ops::squeeze op;
auto result = op.evaluate({&x, &y}, {}, {});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Squeeze_args_3) {
auto x = NDArrayFactory::create<double>('c', {2, 1, 1, 1, 2}, {1, 2, 3, 4});
auto exp = NDArrayFactory::create<double>('c', {2, 1, 2}, {1, 2, 3, 4});
sd::ops::squeeze op;
auto result = op.evaluate({&x}, {}, {-2, -3});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_SpaceToDepth_1) {
auto x = NDArrayFactory::create<double>('c', {1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto exp = NDArrayFactory::create<double>('c', {1, 1, 1, 12}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
sd::ops::space_to_depth op;
auto result = op.evaluate({&x}, {}, {2, 1});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_SpaceToDepth_2) {
auto x = NDArrayFactory::create<double>('c', {1, 3, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto exp = NDArrayFactory::create<double>('c', {1, 12, 1, 1}, {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12});
sd::ops::space_to_depth op;
auto result = op.evaluate({&x}, {}, {2, 0});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_DepthToSpace_1) {
auto x = NDArrayFactory::create<double>('c', {1, 1, 1, 12}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto exp = NDArrayFactory::create<double>('c', {1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
sd::ops::depth_to_space op;
auto result = op.evaluate({&x}, {}, {2, 1});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_DepthToSpace_2) {
auto x = NDArrayFactory::create<double>('c', {1, 12, 1, 1}, {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12});
auto exp = NDArrayFactory::create<double>('c', {1, 3, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
sd::ops::depth_to_space op;
auto result = op.evaluate({&x}, {}, {2, 0});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_DepthToSpace_3) {
auto x = NDArrayFactory::create<double>('c', {4, 4, 16, 16});
auto exp = NDArrayFactory::create<double>('c', {4, 16, 64, 1});
sd::ops::depth_to_space op;
auto result = op.evaluate({&x}, {}, {4, 1});
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Cross_1) {
auto a = NDArrayFactory::create<double>('c', {3}, {1, 2, 3});
auto b = NDArrayFactory::create<double>('c', {3}, {6, 7, 8});
auto exp = NDArrayFactory::create<double>('c', {3}, {-5, 10, -5});
sd::ops::cross op;
auto result = op.evaluate({&a, &b});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Cross_2) {
auto a = NDArrayFactory::create<double>('c', {2, 3}, {1, 2, 3, 1, 2, 3});
auto b = NDArrayFactory::create<double>('c', {2, 3}, {6, 7, 8, 6, 7, 8});
auto exp = NDArrayFactory::create<double>('c', {2, 3}, {-5, 10, -5, -5, 10, -5});
sd::ops::cross op;
auto result = op.evaluate({&a, &b});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Cross_3) {
auto a = NDArrayFactory::create<double>('c', {3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
auto b = NDArrayFactory::create<double>('c', {3, 3}, {2, 3, 4, 7, 6, 5, 6, 3, 2});
auto exp = NDArrayFactory::create<double>('c', {3, 3}, { -1, 2, -1, -11, 22, -11, -11, 40, -27});
sd::ops::cross op;
auto result = op.evaluate({&a, &b});
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_Add_119) {
auto a = NDArrayFactory::create<double>('c', {1, 4}, {1, 2, 3, 4});
auto b = NDArrayFactory::create<double>('c', {4}, {1, 2, 3, 4});
auto exp = NDArrayFactory::create<double>('c', {1, 4}, {2, 4, 6, 8});
sd::ops::add op;
auto result = op.evaluate({&a, &b});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(2, z->rankOf());
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_TileToShape_1) {
auto x = NDArrayFactory::create<double>('c', {2, 1, 3});
auto exp = NDArrayFactory::create<double>('c', {2, 4, 3}, {1.f, 2.f, 3.f,1.f, 2.f, 3.f,1.f, 2.f, 3.f,1.f, 2.f, 3.f,
4.f, 5.f, 6.f,4.f, 5.f, 6.f,4.f, 5.f, 6.f,4.f, 5.f, 6.f});
x.linspace(1.f);
sd::ops::tile_to_shape op;
auto result = op.evaluate({&x},{}, {2, 4, 3});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_StridedSlice_Alex_1) {
auto x = NDArrayFactory::create<double>('c', {3, 4, 5});
x.linspace(1);
auto exp = NDArrayFactory::create<double>('c', {1,3,4,5});
exp.linspace(1);
sd::ops::strided_slice op;
auto result = op.evaluate({&x}, {}, {0,0,0,1,0, -999,0,0,0, -999,3,4,5, -999,1,1,1});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_StridedSlice_Alex_2) {
auto x = NDArrayFactory::create<double>('c', {3, 4, 5});
auto begin = NDArrayFactory::create<double>('c', {4}, {-999,0,0,0});
auto end = NDArrayFactory::create<double>('c', {4}, {-999,3,4,5});
auto stride = NDArrayFactory::create<double>('c', {4}, {-999,1,1,1});
x.linspace(1);
auto exp = NDArrayFactory::create<double>('c', {1,3,4,5});
exp.linspace(1);
sd::ops::strided_slice op;
auto result = op.evaluate({&x, &begin, &end, &stride}, {}, {0,0,0,1,0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_StridedSlice_Alex_3) {
int axis = 0;
2019-06-06 14:21:15 +02:00
auto x = NDArrayFactory::create<double>('c', {1}, {10});
auto begin = NDArrayFactory::create<int>('c', {1}, {axis});
auto end = NDArrayFactory::create<int>('c', {1}, {axis});
Dev branch merge: dev_20190606 (#7904) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks
2019-06-15 13:34:34 +02:00
auto stride = NDArrayFactory::create<int>('c', {1}, {1});
2019-06-06 14:21:15 +02:00
//x.linspace(1);
//auto exp = NDArrayFactory::create<double>('c', {1,3,4,5});
//exp.linspace(1);
sd::ops::strided_slice op;
auto result = op.evaluate({&x, &begin, &end, &stride}, {}, {1,0,0,0,0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(z->isEmpty());
2019-06-06 14:21:15 +02:00
}
TEST_F(DeclarableOpsTests4, Test_StridedSlice_Alex_4) {
auto x = NDArrayFactory::create<double>('c', {1,3}, {1, 2, 3});
auto begin = NDArrayFactory::create<double>('c', {2}, {0, 0});
auto end = NDArrayFactory::create<double>('c', {2}, {0,1});
auto stride = NDArrayFactory::create<double>('c', {2}, {1,1});
// x.linspace(1);
auto exp = NDArrayFactory::create<double>('c', {1}, {1});
//exp.linspace(1);
sd::ops::strided_slice op;
auto result = op.evaluate({&x, &begin, &end, &stride}, {}, {1,0,1,0,2});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), result.status());
2019-06-06 14:21:15 +02:00
auto z = result.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(z->lengthOf() == 1);
ASSERT_TRUE(exp.equalsTo(z));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test1) {
auto x1 = NDArrayFactory::create<double>('c', {2,2,2});
auto x2 = NDArrayFactory::create<double>('c', {2,2,2});
auto x3 = NDArrayFactory::create<double>('c', {2,2,2});
x1.linspace(1);
x2.linspace(9);
x3.linspace(17);
auto expected = NDArrayFactory::create<double>('c', {3,2,2,2});
expected.linspace(1);
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test2) {
auto x1 = NDArrayFactory::create<double>('c', {1,2}, {1,2});
auto x2 = NDArrayFactory::create<double>('c', {1,2}, {3,4});
auto x3 = NDArrayFactory::create<double>('c', {1,2}, {5,6});
auto expected = NDArrayFactory::create<double>('c', {3,1,2}, {1,2,3,4,5,6});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test3) {
auto x1 = NDArrayFactory::create<double>('c', {2,1}, {1,2});
auto x2 = NDArrayFactory::create<double>('c', {2,1}, {3,4});
auto x3 = NDArrayFactory::create<double>('c', {2,1}, {5,6});
auto expected = NDArrayFactory::create<double>('c', {3,2,1}, {1,2,3,4,5,6});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
\
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test4) {
auto x1 = NDArrayFactory::create<double>('c', {2}, {1,2});
auto x2 = NDArrayFactory::create<double>('c', {2}, {3,4});
auto x3 = NDArrayFactory::create<double>('c', {2}, {5,6});
auto expected = NDArrayFactory::create<double>('c', {3,2}, {1,2,3,4,5,6});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test5) {
auto x1 = NDArrayFactory::create<double>('c', {1}, {1});
auto x2 = NDArrayFactory::create<double>('c', {1}, {3});
auto x3 = NDArrayFactory::create<double>('c', {1}, {5});
auto expected = NDArrayFactory::create<double>('c', {3,1}, {1,3,5});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test6) {
auto x1 = NDArrayFactory::create<double>(1.);
auto x2 = NDArrayFactory::create<double>(3.);
auto x3 = NDArrayFactory::create<double>(5.);
auto expected = NDArrayFactory::create<double>('c', {3}, {1,3,5});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1, &x2, &x3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, parallel_stack_test7) {
auto x1 = NDArrayFactory::create<double>(1.);
auto expected = NDArrayFactory::create<double>('c', {1}, {1.});
sd::ops::parallel_stack op;
auto results = op.evaluate({&x1});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test1) {
auto in0 = NDArrayFactory::create<double>('c', {2}, {1, 2});
auto in1 = NDArrayFactory::create<double>('c', {3}, {10, 20, 30});
auto in2 = NDArrayFactory::create<double>('c', {4}, {100, 200, 300, 400});
auto exp0 = NDArrayFactory::create<double>('c', {2,3,4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2});
auto exp1 = NDArrayFactory::create<double>('c', {2,3,4}, {10, 10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 10, 10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30});
auto exp2 = NDArrayFactory::create<double>('c', {2,3,4}, {100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2}, {}, {0});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
// out0->printIndexedBuffer();
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test2) {
auto in0 = NDArrayFactory::create<double>('c', {2}, {1, 2});
auto in1 = NDArrayFactory::create<double>('c', {3}, {10, 20, 30});
auto in2 = NDArrayFactory::create<double>('c', {4}, {100, 200, 300, 400});
auto exp0 = NDArrayFactory::create<double>('c', {3,2,4}, {1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2});
auto exp1 = NDArrayFactory::create<double>('c', {3,2,4}, {10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 30, 30, 30, 30, 30, 30, 30, 30});
auto exp2 = NDArrayFactory::create<double>('c', {3,2,4}, {100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test3) {
auto in0 = NDArrayFactory::create<double>('c', {2}, {1, 2});
auto in1 = NDArrayFactory::create<double>('c', {1,3}, {10, 20, 30});
auto in2 = NDArrayFactory::create<double>('c', {2,2}, {100, 200, 300, 400});
auto exp0 = NDArrayFactory::create<double>('c', {3,2,4}, {1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2});
auto exp1 = NDArrayFactory::create<double>('c', {3,2,4}, {10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 30, 30, 30, 30, 30, 30, 30, 30});
auto exp2 = NDArrayFactory::create<double>('c', {3,2,4}, {100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test4) {
auto in0 = NDArrayFactory::create<double>('c', {1,2}, {1, 2});
auto in1 = NDArrayFactory::create<double>('c', {3,1}, {10, 20, 30});
auto in2 = NDArrayFactory::create<double>('c', {1,4,1}, {100, 200, 300, 400});
auto exp0 = NDArrayFactory::create<double>('c', {2,3,4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2});
auto exp1 = NDArrayFactory::create<double>('c', {2,3,4}, {10, 10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 10, 10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30});
auto exp2 = NDArrayFactory::create<double>('c', {2,3,4}, {100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400, 100, 200, 300, 400});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2}, {}, {0});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test5) {
auto in0 = NDArrayFactory::create<double>(1);
auto in1 = NDArrayFactory::create<double>(2);
auto in2 = NDArrayFactory::create<double>(3);
auto exp0 = NDArrayFactory::create<double>('c', {1,1,1}, {1});
auto exp1 = NDArrayFactory::create<double>('c', {1,1,1}, {2});
auto exp2 = NDArrayFactory::create<double>('c', {1,1,1}, {3});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2}, {}, {0});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test6) {
auto in0 = NDArrayFactory::create<double>('c', {2,2},{1,2,3,4});
auto in1 = NDArrayFactory::create<double>(5);
auto in2 = NDArrayFactory::create<double>(6);
auto exp0 = NDArrayFactory::create<double>('c', {4,1,1}, {1,2,3,4});
auto exp1 = NDArrayFactory::create<double>('c', {4,1,1}, {5,5,5,5});
auto exp2 = NDArrayFactory::create<double>('c', {4,1,1}, {6,6,6,6});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2}, {}, {0});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test7) {
auto in0 = NDArrayFactory::create<double>('c', {2,2},{1,2,3,4});
auto in1 = NDArrayFactory::create<double>(5);
auto in2 = NDArrayFactory::create<double>(6);
auto exp0 = NDArrayFactory::create<double>('c', {1,4,1}, {1,2,3,4});
auto exp1 = NDArrayFactory::create<double>('c', {1,4,1}, {5,5,5,5});
auto exp2 = NDArrayFactory::create<double>('c', {1,4,1}, {6,6,6,6});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0, &in1, &in2}, {}, {1});
auto out0 = results.at(0);
auto out1 = results.at(1);
auto out2 = results.at(2);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
ASSERT_TRUE(exp1.isSameShape(out1));
ASSERT_TRUE(exp1.equalsTo(out1));
ASSERT_TRUE(exp2.isSameShape(out2));
ASSERT_TRUE(exp2.equalsTo(out2));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test8) {
auto in0 = NDArrayFactory::create<double>(5);
auto exp0 = NDArrayFactory::create<double>('c', {1}, {5});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0}, {}, {0});
auto out0 = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, meshgrid_test9) {
auto in0 = NDArrayFactory::create<double>(5);
auto exp0 = NDArrayFactory::create<double>('c', {1}, {5});
sd::ops::meshgrid op;
auto results = op.evaluate({&in0}, {}, {1});
auto out0 = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp0.isSameShape(out0));
ASSERT_TRUE(exp0.equalsTo(out0));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, WeightedCrossEntropyWithLogits_1) {
auto input = NDArrayFactory::create<double>('c', {2, 3}, {11.f, 13.f, 4.f, 15.f, 6.f, 3.f});
auto targets = NDArrayFactory::create<double>('c', {2, 3}, {15.5f, 15.7f, 5.f , 15.f, 5.f, 6.f});
auto weight = NDArrayFactory::create<double>(0.7f);
auto expected = NDArrayFactory::create<double>('c', {2, 3}, {-159.50006, -191.1, -16.009075, -210., -24.001238, -15.03887});
//Targets {15.5f, 15.7f, 5.f , 15.f, 5.f, 6.f};
//----------
//Inputs {11.f, 13.f, 4.f, 15.f, 6.f, 3.f};
//----------
//Weights [0.7]
//Result {-159.50006, -191.1, -16.009075, -210., -24.001238, -15.03887}
sd::ops::weighted_cross_entropy_with_logits op;
auto results = op.evaluate({&targets, &input, &weight});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
// output->printIndexedBuffer();
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, WeightedCrossEntropyWithLogits_2) {
auto input = NDArrayFactory::create<double>('c', {2, 3}, {11.f, 13.f, 4.f, 15.f, 6.f, 3.f});
auto targets = NDArrayFactory::create<double>('c', {2, 3}, {15.5f, 15.7f, 5.f, 15.f, 5.f, 6.f});
auto weights = NDArrayFactory::create<double>({0.5f, 0.7f, 1.0f}) ;
auto expected = NDArrayFactory::create<double>('c', {2, 3}, {-159.5001f, -191.1f, -15.98185f, -210.f, -24.001238f, -14.951412f});
sd::ops::weighted_cross_entropy_with_logits op;
auto results = op.evaluate({&targets, &input, &weights});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, lstm_test1) {
const int time = 5;
const int batchSize = 3;
const int inSize = 3;
const int numProj = 3;
const int numUnits = 3;
auto x = NDArrayFactory::create<double>('c', {time, batchSize, inSize});
auto h0 = NDArrayFactory::create<double>('c', {batchSize, numProj});
auto c0 = NDArrayFactory::create<double>('c', {batchSize, numUnits});
auto Wx = NDArrayFactory::create<double>('c', {inSize, 4*numUnits});
auto Wh = NDArrayFactory::create<double>('c', {numProj, 4*numUnits});
auto Wc = NDArrayFactory::create<double>('c', {3*numUnits});
auto Wp = NDArrayFactory::create<double>('c', {numUnits, numProj});
auto b = NDArrayFactory::create<double>('c', {4*numUnits});
x.linspace(0.5, 0.5);
h0 = 1.;
c0 = 2.;
Wx = 0.003;
Wh = 0.006;
Wc = 0.;
Wp = 0.;
b = 0.5;
auto expH = NDArrayFactory::create<double>('c', {time, batchSize, numProj}, {0.57574,0.57574,0.57574,0.58006,0.58006,0.58006,0.58434,0.58434,0.58434,
0.55114,0.55114,0.55114,0.55732,0.55732,0.55732,0.56338,0.56338,0.56338,
0.53763,0.53763,0.53763,0.54534,0.54534,0.54534,0.55287,0.55287,0.55287,
0.53626,0.53626,0.53626,0.54487,0.54487,0.54487,0.55327,0.55327,0.55327,
0.54484,0.54484,0.54484,0.55379,0.55379,0.55379,0.5625 ,0.5625 ,0.5625});
auto expClast = NDArrayFactory::create<double>('c', {1, batchSize, numProj}, {1.1589154,1.1589154,1.1589154,1.1892855,1.1892855,1.1892855,1.219861 ,1.219861 ,1.219861});
sd::ops::lstm op;
auto results = op.evaluate({&x, &h0, &c0, &Wx, &Wh, &Wc, &Wp, &b}, {0., 0., 0.}, {0, 0});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, results.status());
2019-06-06 14:21:15 +02:00
auto *h = results.at(0);
auto *c = results.at(1);
2019-06-06 14:21:15 +02:00
auto cLast = (*c)({4,5,0,0,0,0},true);
ASSERT_TRUE(expH.isSameShape(h));
ASSERT_TRUE(expH.equalsTo(h));
ASSERT_TRUE(expClast.isSameShape(&cLast));
ASSERT_TRUE(expClast.equalsTo(&cLast));
2019-06-06 14:21:15 +02:00
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, relu6_test1) {
auto input = NDArrayFactory::create<double>('c', {2,4}, {-13.,10,-5,0,2,7,6,12});
auto expected = NDArrayFactory::create<double>('c', {2,4}, {0., 6., 0., 0.,2., 6., 6., 6.});
sd::ops::relu6 op;
auto results = op.evaluate({&input}, {0.}, {});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, results.status());
2019-06-06 14:21:15 +02:00
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
///////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, relu6_bp_test1) {
auto input = NDArrayFactory::create<double>('c', {2,4}, {-13.,10, -5, 0, 2, 7, 6, 5});
auto gradO = NDArrayFactory::create<double>('c', {2,4}, {-1., -2., 0., 4., 5., 6., 7., 8.});
auto expected = NDArrayFactory::create<double>('c', {2,4}, {0., 0., 0., 0., 5., 0., 0., 8.});
sd::ops::relu6_bp op;
auto results = op.evaluate({&input, &gradO}, {0.});
2019-06-06 14:21:15 +02:00
ASSERT_EQ(ND4J_STATUS_OK, results.status());
2019-06-06 14:21:15 +02:00
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, LrnTest_1) {
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, { 5.5f, 0.f, 0.3f, 5.5f,
8.6f, 0.f, 0.f, 0.4f,
1.5f, 1.f, 1.3f, 1.5f,
2.6f, 2.f, 3.f, 1.4f}
2019-06-06 14:21:15 +02:00
);
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, {
0.98386997f, 0.f, 0.05358852f, 0.9824562f,
0.99330735f, 0.f, 0.f, 0.37139067f,
0.72760683f, 0.4850712f, 0.5848977f, 0.67488194f,
0.7581754f, 0.58321184f, 0.86747235f, 0.4048204f}
2019-06-06 14:21:15 +02:00
);
sd::ops::lrn op;
auto results = op.evaluate({&x}, {1.0, 1.0, 0.5}, {5});
auto out = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(out));
// out->printIndexedBuffer("LRN out");
// exp.printIndexedBuffer("LRN exp");
ASSERT_TRUE(exp.equalsTo(out));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, LrnTest_2) {
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, { 5.5f, 0.f, 0.3f, 5.5f,
8.6f, 0.f, 0.f, 0.4f,
1.5f, 1.f, 1.3f, 1.5f,
2.6f, 2.f, 3.f, 1.4f});
2019-06-06 14:21:15 +02:00
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 2}, {
0.98386997f, 0.f, 0.05358852f, 0.9824562f,
0.99330735f, 0.f, 0.f, 0.37139067f,
0.72760683f, 0.4850712f, 0.5848977f, 0.67488194f,
0.7581754f, 0.58321184f, 0.86747235f, 0.4048204f});
2019-06-06 14:21:15 +02:00
sd::ops::lrn op;
auto results = op.evaluate({&x}, {1.0, 1.0, 0.5}, {2});
auto out = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(out));
// out->printIndexedBuffer("LRN out");
// exp.printIndexedBuffer("LRN exp");
ASSERT_TRUE(exp.equalsTo(out));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, LrnTest_3) {
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
5.5f, 0.f, 0.3f, 5.5f,
1.5f, 0.f, 1.3f, 6.5f,
8.6f, 0.f, 0.f, 0.4f,
2.5f, 1.f, 0.3f, 4.5f,
1.5f, 1.f, 1.3f, 1.5f,
3.5f, 0.f, 1.3f, 2.5f,
2.6f, 2.f, 3.f, 1.4f,
4.5f, 1.f, 0.3f, 0.5f}
2019-06-06 14:21:15 +02:00
);
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
0.9824562f, 0.f, 0.03822664f, 0.9824562f,
0.67488194f, 0.f, 0.18924236f, 0.96960944f,
0.99330735f, 0.f, 0.f, 0.37139067f,
0.86567914f, 0.18702209f, 0.05610663f, 0.9520745f,
0.6154575f, 0.34942827f, 0.45425674f, 0.6154575f,
0.905509f, 0.f, 0.2824086f, 0.8361251f,
0.57063663f, 0.41959068f, 0.629386f, 0.3504383f,
0.9520745f, 0.21039814f, 0.06311944f, 0.3268602f }
2019-06-06 14:21:15 +02:00
);
sd::ops::lrn op;
auto results = op.evaluate({&x}, {1.0, 1.0, 0.5}, {2});
auto out = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(out));
// out->printIndexedBuffer("LRN out");
// exp.printIndexedBuffer("LRN exp");
ASSERT_TRUE(exp.equalsTo(out));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, LrnTest_4) {
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
5.5f, 0.f, 0.3f, 5.5f,
1.5f, 0.f, 1.3f, 6.5f,
8.6f, 0.f, 0.f, 0.4f,
2.5f, 1.f, 0.3f, 4.5f,
1.5f, 1.f, 1.3f, 1.5f,
3.5f, 0.f, 1.3f, 2.5f,
2.6f, 2.f, 3.f, 1.4f,
4.5f, 1.f, 0.3f, 0.5f}
2019-06-06 14:21:15 +02:00
);
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
0.70082176f, 0.f, 0.03822664f, 0.70082176f,
0.21835658f, 0.f, 0.18924236f, 0.9462118f,
0.9922489f, 0.f, 0.f, 0.04615111f,
0.46755522f, 0.18702209f, 0.05610663f, 0.8415994f,
0.5241424f, 0.34942827f, 0.45425674f, 0.5241424f,
0.76033086f, 0.f, 0.2824086f, 0.54309344f,
0.54546785f, 0.41959068f, 0.629386f, 0.29371348f,
0.94679165f, 0.21039814f, 0.06311944f, 0.10519907f}
2019-06-06 14:21:15 +02:00
);
sd::ops::lrn op;
auto results = op.evaluate({&x}, {1.0, 1.0, 0.5}, {5});
auto out = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(out));
// out->printIndexedBuffer("LRN out");
// exp.printIndexedBuffer("LRN exp");
ASSERT_TRUE(exp.equalsTo(out));
2019-06-06 14:21:15 +02:00
}
////////////////////////////////////////////////////////////////////////////////
TYPED_TEST(TypedDeclarableOpsTests4, LrnTest_5) {
auto x = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
5.5f, 0.f, 0.3f, 5.5f,
1.5f, 0.f, 1.3f, 6.5f,
8.6f, 0.f, 0.f, 0.4f,
2.5f, 1.f, 0.3f, 4.5f,
1.5f, 1.f, 1.3f, 1.5f,
3.5f, 0.f, 1.3f, 2.5f,
2.6f, 2.f, 3.f, 1.4f,
4.5f, 1.f, 0.3f, 0.5f}
2019-06-06 14:21:15 +02:00
);
auto eps = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4}, {
0.70082176f, 0.f, 0.03822664f, 0.70082176f,
0.21835658f, 0.f, 0.18924236f, 0.9462118f,
2019-06-06 14:21:15 +02:00
0.9922489f, 0.f, 0.f, 0.04615111f,
0.46755522f, 0.18702209f, 0.05610663f, 0.8415994f,
2019-06-06 14:21:15 +02:00
0.5241424f, 0.34942827f, 0.45425674f, 0.5241424f,
0.76033086f, 0.f, 0.2824086f, 0.54309344f,
2019-06-06 14:21:15 +02:00
0.54546785f, 0.41959068f, 0.629386f, 0.29371348f,
0.94679165f, 0.21039814f, 0.06311944f, 0.10519907f}
2019-06-06 14:21:15 +02:00
);
auto exp = NDArrayFactory::create<TypeParam>('c', {2, 2, 2, 4});
sd::ops::lrn_bp op;
auto results = op.evaluate({&x, &eps}, {1.0, 1.0, 0.5}, {5}, {}, {}, false);
auto out = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(exp.isSameShape(out));
// out->printIndexedBuffer("LRN out");
// exp.printIndexedBuffer("LRN exp");
// ASSERT_TRUE(exp.equalsTo(out));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test1) {
const int rows = 3;
const int cols = 5;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {1.f, 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 0.f, 0.f, 0.f, 1.f, 1.f, 1.f, 0.f, 0.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
// output->printIndexedBuffer();
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test2) {
const int rows = 3;
const int cols = 5;
const int diag = 2;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {1.f, 1.f, 1.f, 0.f, 0.f, 1.f, 1.f, 1.f, 1.f, 0.f, 1.f, 1.f, 1.f, 1.f, 1.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols, diag});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test3) {
const int rows = 3;
const int cols = 5;
const int diag = -1;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 0.f, 0.f, 0.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols, diag});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test4) {
const int rows = 3;
const int cols = 5;
const int diag = -2;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols, diag});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test5) {
const int rows = 5;
auto expected = NDArrayFactory::create<float>('c', {rows, rows}, {1.f, 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 0.f, 0.f, 0.f, 1.f, 1.f, 1.f, 0.f, 0.f, 1.f, 1.f, 1.f, 1.f, 0.f, 1.f, 1.f, 1.f, 1.f, 1.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test6) {
const int rows = 3;
const int cols = 5;
const int diag = -20;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols, diag});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, tri_test7) {
const int rows = 3;
const int cols = 5;
const int diag = 20;
auto expected = NDArrayFactory::create<float>('c', {rows, cols}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
2019-06-06 14:21:15 +02:00
sd::ops::tri op;
auto results = op.evaluate({}, {}, {rows, cols, diag});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test1) {
auto input = NDArrayFactory::create<double>('c', {4, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {4, 3}, {1, 2, 3, 0, 5, 6, 0, 0, 9, 0, 0, 0});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test2) {
auto input = NDArrayFactory::create<double>('c', {4, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {4, 3}, {1, 2, 3,4, 5, 6,0, 8, 9,0, 0, 12});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {-1});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test3) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2,3, 4,0, 6,7, 8,9,10,0,12});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {-1});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test4) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2,0, 4,0, 0,7, 8,0, 10,0, 0});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test5) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {0, 2,0, 0,0, 0,0, 8,0, 0,0, 0});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {1});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test6) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {0, 0,0, 0,0, 0,0, 0,0, 0,0, 0});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {10});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test7) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {-10});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test8) {
auto input = NDArrayFactory::create<double>('c', {6}, {1, 2, 3, 4, 5, 6});
auto expected = NDArrayFactory::create<double>('c', {6, 6}, {1, 2, 3, 4, 5, 6,0, 2, 3, 4, 5, 6,0, 0, 3, 4, 5, 6,0, 0, 0, 4, 5, 6,0, 0, 0, 0, 5, 6,0, 0, 0, 0, 0, 6});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test9) {
auto input = NDArrayFactory::create<double>('c', {6}, {1, 2, 3, 4, 5, 6});
auto expected = NDArrayFactory::create<double>('c', {6, 6}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 2, 3, 4, 5, 6, 0, 0, 3, 4, 5, 6});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {-3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test10) {
auto input = NDArrayFactory::create<double>('c', {6}, {1, 2, 3, 4, 5, 6});
auto expected = NDArrayFactory::create<double>('c', {6, 6}, {0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 5, 6, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {3});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_test11) {
auto input = NDArrayFactory::create<double>('c', {6}, {1, 2, 3, 4, 5, 6});
auto expected = NDArrayFactory::create<double>('c', {6, 6}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6});
sd::ops::triu op;
auto results = op.evaluate({&input}, {}, {-58});
auto output = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(output));
ASSERT_TRUE(expected.equalsTo(output));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_bp_test1) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto gradO = NDArrayFactory::create<double>('c', {2, 3, 2});
gradO = 0.5;
[WIP] more CUDA stuff (#57) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * Added gradcheck test for dynamic_partition_bp op. * - implementation of dilation op (cpu and cuda) Signed-off-by: Yurii <yurii@skymind.io> * Fixed broadcast_dynamic_shape 1D case and tests. * Fixed usage of default integer arguments. * Fixed dynamic_partition_bp op and tests. * Eliminated test with grad check for dynamic_partition_bp op. * start working on cuda svd - porting available corresponding api from cuSOLVER library Signed-off-by: Yurii <yurii@skymind.io> * provide prelu_bp Signed-off-by: Yurii <yurii@skymind.io> * - provide gruCell_bp (old version ??) Signed-off-by: Yurii <yurii@skymind.io> * - polishing cumsum_bp and cumprod_bp tests Signed-off-by: Yurii <yurii@skymind.io> * provide sparseSoftmaxCrossEntropyWithLogits and sparseSoftmaxCrossEntropyWithLogits_grad Signed-off-by: Yurii <yurii@skymind.io> * Fixed atomicMul with float input/output * implementation of cuda kernel for triu_bp operation Signed-off-by: Yurii <yurii@skymind.io> * Refactored lup helper to add parrallel computing. * cusolver libraries Signed-off-by: raver119 <raver119@gmail.com> * uncomment cuSolver APIs in svd.cu Signed-off-by: Yurii <yurii@skymind.io> * cusolver var Signed-off-by: raver119 <raver119@gmail.com> * - further work on cuSolver svd Signed-off-by: Yurii <yurii@skymind.io> * Implement usage of cuda solver to LUP decomposition. * - correct naames in lup functions Signed-off-by: Yurii <yurii@skymind.io> * correct svdQR cuda Signed-off-by: Yurii <yurii@skymind.io> * - provide transpositions of input matrices in case of c order in svdCudaQR Signed-off-by: Yurii <yurii@skymind.io> * Fixed implementation issues with LUP usign cuda solver. * Implementation of matrix_determinant helper with cuda kernels. Working revision. * Implemented log_matrix_determinant helper with cuda kernels. * - implementation of batched cuda svd Signed-off-by: Yurii <yurii@skymind.io> * Refactored cholesky helper and implementation of cuda solver cholesky batch. * - implementation of cuda kernel for tile bp Signed-off-by: Yurii <yurii@skymind.io> * Implementation of cholesky and logdet with cuda kernels. * - implementation of cuda kernel for sru_bidirectional Signed-off-by: Yurii <yurii@skymind.io> * Fixed cholesky helper. * Cholesky op helper implementation. Working double-based cublas implementation. * bad import excluded Signed-off-by: raver119 <raver119@gmail.com> * Finished with cuda implementation of cholesky helper and tests. * - implementation of cuda kernel for sru_bidirectional_backprop operation Signed-off-by: Yurii <yurii@skymind.io> * Implementation of matrix_inverse op helper with cuda kernels. The first revision. * - start working on gruCell_bp Signed-off-by: Yurii <yurii@skymind.io> * Implementation of matrix_inverse helper. * - further work on new gruCell_bp Signed-off-by: Yurii <yurii@skymind.io> * cuBLAS related fixes Signed-off-by: raver119 <raver119@gmail.com> * calculateOutputShapes() now passes device buffers as well Signed-off-by: raver119 <raver119@gmail.com> * special concat/average/accumulate init host pointers now Signed-off-by: raver119 <raver119@gmail.com> * few more tweaks Signed-off-by: raver119 <raver119@gmail.com> * additional CudaDataBufferFactory signatures certain for data types Signed-off-by: raver119 <raver119@gmail.com> * cuSolver host buffer Signed-off-by: raver119 <raver119@gmail.com> * buffer to buffer memcpy host ptr allocation Signed-off-by: raver119 <raver119@gmail.com>
2019-07-12 10:51:51 +02:00
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {0.,0.5,0.,0. ,0.,0. ,0.,0.5,0.,0. ,0.,0.});
2019-06-06 14:21:15 +02:00
sd::ops::triu_bp op;
auto results = op.evaluate({&input, &gradO}, {}, {1});
auto gradI = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(gradI));
ASSERT_TRUE(expected.equalsTo(gradI));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_bp_test2) {
auto input = NDArrayFactory::create<double>('c', {2, 3, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto gradO = NDArrayFactory::create<double>('c', {2, 3, 2});
gradO = 0.5;
auto expected = NDArrayFactory::create<double>('c', {2, 3, 2}, {0.5,0.5,0. ,0.5,0. ,0. ,0.5,0.5,0. ,0.5,0. ,0.});
sd::ops::triu_bp op;
auto results = op.evaluate({&input, &gradO}, {}, {});
auto gradI = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(gradI));
ASSERT_TRUE(expected.equalsTo(gradI));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_bp_test3) {
auto input = NDArrayFactory::create<double>('c', {6}, {1, 2, 3, 4, 5, 6});
auto gradO = NDArrayFactory::create<double>('c', {6,6});
gradO = 0.5;
auto expected = NDArrayFactory::create<double>('c', {6,6}, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5,0.5, 0.5, 0.5, 0.5, 0.5, 0.5,0.5, 0.5, 0.5, 0.5, 0.5, 0.5,0. , 0.5, 0.5, 0.5, 0.5, 0.5,0. , 0. , 0.5, 0.5, 0.5, 0.5,0. , 0. , 0. , 0.5, 0.5, 0.5});
sd::ops::triu_bp op;
auto results = op.evaluate({&input, &gradO}, {}, {-2});
auto gradI = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(gradI));
ASSERT_TRUE(expected.equalsTo(gradI));
2019-06-06 14:21:15 +02:00
}
//////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests4, triu_bp_test4) {
auto input = NDArrayFactory::create<double>('c', {2,3}, {1, 2, 3, 4, 5, 6});
auto gradO = NDArrayFactory::create<double>('c', {2,3});
gradO = 0.5;
auto expected = NDArrayFactory::create<double>('c', {2,3}, {0., 0., 0., 0., 0., 0.});
sd::ops::triu_bp op;
auto results = op.evaluate({&input, &gradO}, {}, {10});
auto gradI = results.at(0);
2019-06-06 14:21:15 +02:00
ASSERT_EQ(Status::OK(), results.status());
2019-06-06 14:21:15 +02:00
ASSERT_TRUE(expected.isSameShape(gradI));
ASSERT_TRUE(expected.equalsTo(gradI));
2019-06-06 14:21:15 +02:00
}