[WIP] Minor fixes (#140)

* - Tile java shape fn removed - Tile 0 validation added - scatter_upd test Signed-off-by: raver119 <raver119@gmail.com> * additional tile validation Signed-off-by: raver119 <raver119@gmail.com> * - provide vector case in cuda scatter op Signed-off-by: Yurii <yurii@skymind.io> * cpu ismax view fix Signed-off-by: raver119 <raver119@gmail.com> * exp Signed-off-by: raver119 <raver119@gmail.com> * cuda ismax fix Signed-off-by: raver119 <raver119@gmail.com>
2019-08-21 15:05:47 +03:00 · 2019-08-21 15:05:47 +03:00 · d9ab299759
commit d9ab299759
parent a5867bb527
10 changed files with 89 additions and 45 deletions
--- a/libnd4j/blas/cpu/NDArray.cpp
+++ b/libnd4j/blas/cpu/NDArray.cpp
@ -231,7 +231,8 @@ void* NDArray::getSpecialBuffer() const {
 // change an array by repeating it the number of times given by reps.
 NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
    const int repsSize = reps.size();
-    int product = 1;
+
    Nd4jLong product = 1;
    for(const auto& item : reps)
        product *= item;
    if(product == 0)
@ -286,6 +287,10 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
 // change an array by repeating it the number of times given by reps.
 void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
    auto repProd = shape::prodLong(reps.data(), reps.size());
    if (repProd < 1)
        throw std::runtime_error("NDArray::tile: reps can't contain 0s");
    // evaluate true tile shapeInfo for comparison with target shapeInfo
    auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace());
    if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo()))  {
--- a/libnd4j/blas/cuda/NDArray.cu
+++ b/libnd4j/blas/cuda/NDArray.cu
@ -312,7 +312,8 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
    Nd4jLong product = 1;
    for(const auto& item : reps)
        product *= item;
-    if(product == 0)
+
    if(product < 1)
        throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !");
    int rankOld = rankOf();
@ -351,6 +352,10 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
 // change an array by repeating it the number of times given by reps.
 void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
    auto repProd = shape::prodLong(reps.data(), reps.size());
    if (repProd < 1)
        throw std::runtime_error("NDArray::tile: reps can't contain 0s");
    // evaluate true tile shapeInfo for comparison with target shapeInfo
    auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace());
    if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo()))  {
--- a/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu
+++ b/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu
@ -48,18 +48,16 @@ namespace nd4j {
        for (int r = blockIdx.x; r < numTads; r += gridDim.x) {
            auto tadOffsetForBlock = tadOffsets[r];
-
+            auto highestElement = dX[r];
            int highestElement = (int) dX[r];
            if (dimensionLength > 1 || tadEWS < 1) {
-                for (int e = threadIdx.x; e < tadLength; e += blockDim.x) {
+                for (Nd4jLong e = threadIdx.x; e < tadLength; e += blockDim.x) {
                    auto xOffset = tadOffsetForBlock + shape::getIndexOffset(e, tadOnlyShapeInfo, tadLength);
                    dZ[xOffset] = (e == highestElement ? (T) 1 : (T) 0);
                }
            } else {
-                for (int e = threadIdx.x; e < tadLength; e += blockDim.x) {
+                for (Nd4jLong e = threadIdx.x; e < tadLength; e += blockDim.x) {
                    // so, we just set dZ[e] for each TAD. Sure, e should be replaced with
                    auto idx = tadOffsetForBlock + (e * tadEWS);
                    dZ[idx] = (e == highestElement ? (T) 1 : (T) 0);
--- a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp
+++ b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp
@ -50,6 +50,9 @@ CUSTOM_OP_IMPL(tile, 1, 1, false, 0, -2) {
    else {
        REQUIRE_TRUE(false, 0, "TILE op: this op requires repeats vector, either as IArgs or second array with length equal to rank of input array to be tiled !");
    }
    auto repProd = shape::prodLong(reps.data(), reps.size());
    REQUIRE_TRUE(repProd > 0, 0, "TILE op: reps can't contain 0s");
    input->tile(reps, *output);
@ -81,7 +84,10 @@ DECLARE_SHAPE_FN(tile) {
    }
    else {
        REQUIRE_TRUE(false, 0, "TILE op: this op requires repeats vector, either as IArgs or second array with length equal to rank of input array to be tiled !");
-    }    
+    }
    auto repProd = shape::prodLong(reps.data(), reps.size());
    REQUIRE_TRUE(repProd > 0, 0, "TILE op: reps can't contain 0s");
    std::vector<Nd4jLong> shape(inRank);
    for (int e = 0; e < shape::rank(inShape); e++)
--- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp
@ -125,9 +125,12 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
        //to the back.
        //permuted version of the input shape info for setting up the tad problem
        auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
        auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
        auto tadShapeShapeInfo = tadPack.primaryShapeInfo();
        auto tadOffsets = tadPack.primaryOffsets();
        auto zOfsets = tadPackZ.platformOffsets();
        int tadLength = shape::length(tadShapeShapeInfo);
        int tads = tadPack.numberOfTads();
@ -137,7 +140,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
        num_threads = nd4j::math::nd4j_min<int>(num_threads, omp_get_max_threads());
        auto tadEWS = shape::elementWiseStride(tadShapeShapeInfo);
-        auto zEWS = tadEWS;
+        auto zEWS = shape::elementWiseStride(tadPackZ.primaryShapeInfo());
        int span = (tads / num_threads) + 8;
@ -151,7 +154,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
            for (int r = start; r < end; r++) {
                if (tadEWS > 0 && zEWS > 0 && dimensionsLength == 1) {
                    auto rX = const_cast<NDArray*>(input)->bufferAsT<X>() + tadOffsets[r];
-                    auto rZ = output->bufferAsT<Z>() + tadOffsets[r];
+                    auto rZ = output->bufferAsT<Z>() + zOfsets[r];
                    auto maxValue = rX[0];
                    int maxIdx = 0;
@ -168,7 +171,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
                            rZ[i] = maxIdx == i ? (Z) 1 : (Z) 0;
                        }
                    } 
-                    else {
+                    else if (tadEWS > 1 && zEWS > 1) {
                        for (int i = 0; i < tadLength; i++) {
                            if (rX[i * tadEWS] > maxValue) {
                                maxIdx = i;
@ -180,6 +183,20 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
                        for (int i = 0; i < tadLength; i++) {
                            rZ[i * zEWS] = maxIdx == i ? (Z) 1 : (Z) 0;
                        }
                    } else {
                        for (int i = 0; i < tadLength; i++) {
                            auto xOffset = shape::getIndexOffset(i, tadShapeShapeInfo, tadLength);
                            if (rX[xOffset] > maxValue) {
                                maxIdx = i;
                                maxValue = rX[xOffset];
                            }
                        }
                        PRAGMA_OMP_SIMD
                        for (int i = 0; i < tadLength; i++) {
                            auto zOffset = shape::getIndexOffset(i, tadPackZ.primaryShapeInfo(), tadLength);
                            rZ[zOffset] = maxIdx == i ? (Z) 1 : (Z) 0;
                        }
                    }
                } 
                else {
--- a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu
@ -62,7 +62,7 @@ static void ismax_(nd4j::LaunchContext * context, const NDArray* input, NDArray*
        int dimensionLength = dimensions.size();
        std::vector<int> copy(dimensions);
-        auto packZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), copy.data(), copy.size());
+        auto packZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), copy.data(), copy.size());
        auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions);
--- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu
@ -183,7 +183,7 @@ __global__ static void scatterLockCuda(const int opCode,
    __shared__ bool vectorCase;
    if(threadIdx.x == 0)
-        vectorCase = yTadLen == xLen && shape::rank(xShapeInfo) == 1;
+        vectorCase = yTadLen == xLen && shape::rank(xShapeInfo) <= 1;
    __syncthreads();
    for (int e = 0; e < xLen; e++) {
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp
@ -52,3 +52,20 @@ TEST_F(DeclarableOpsTests16, test_repeat_119) {
    delete result;
 }
 TEST_F(DeclarableOpsTests16, test_scatter_update_119) {
    auto x = NDArrayFactory::create<float>('c', {3}, {1, 1, 1});
    auto y = NDArrayFactory::create<int>(0);
    auto w = NDArrayFactory::create<float>(3.0f);
    auto e = NDArrayFactory::create<float>('c', {3}, {3.f, 1.f, 1.f});
    nd4j::ops::scatter_upd op;
    auto result = op.execute({&x, &y, &w}, {}, {});
    ASSERT_EQ(Status::OK(), result->status());
    auto z = result->at(0);
    ASSERT_EQ(e, *z);
    delete result;
 }
--- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp
+++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp
@ -1161,6 +1161,34 @@ TEST_F(JavaInteropTests, test_bfloat16_rng) {
    ASSERT_TRUE(z.sumNumber().e<float>(0) > 0);
 }
 TEST_F(JavaInteropTests, test_ismax_view) {
    auto original = NDArrayFactory::create<double>('c', {2, 3, 40});
    auto v = original.subarray({NDIndex::all(), NDIndex::all(), NDIndex::interval(0, 40, 2)});
    v->assign(1.0);
    auto e = v->ulike();
    auto t = e.tensorAlongDimension(0, {0, 1});
    t->assign(1.0);
    auto z = v->ulike();
    Nd4jLong iArgs[] = {2L, 0L};
    Context ctx(1);
    ctx.setInputArray(0, v->buffer(), v->shapeInfo(), v->specialBuffer(), v->specialShapeInfo());
    ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo());
    ctx.setIArguments(iArgs, 1);
    nd4j::ops::ismax op;
    op.execute(&ctx);
    z.printIndexedBuffer("z");
    ASSERT_EQ(e, z);
    delete v;
    delete t;
 }
 /*
 TEST_F(JavaInteropTests, Test_Results_Conversion_1) {
    auto pl = nd4j::graph::readFlatBuffers("./resources/gru_dynamic_mnist.fb");
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Tile.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Tile.java
@ -103,38 +103,6 @@ public class Tile extends DynamicCustomOp {
        return ret;
    }
    @Override
    public List<LongShapeDescriptor> calculateOutputShape() {
        if(inputArguments.size() == 0)
            return Collections.emptyList();
        /**
         * This op is special case: we can't infer its shape before both inputs are available.
         * So if reps argument is full of 0.0s - we skip shape inference
         *
         * And during actual op invocation both inputs should be available due to topo sort
         */
        if (is_static_reps)
            return Nd4j.getExecutioner().calculateOutputShape(this);
        if (inputArguments().length < 2)
            return Collections.emptyList();
        val array = inputArguments()[1];
        // FIXME: int cast
        val reps = new long[(int) array.length()];
        for (int e = 0; e < reps.length; e++)
            reps[e] = (int) array.getDouble(e);
        if (ArrayUtil.prodLong(reps) == 0)
            return Collections.emptyList();
        else
            return Nd4j.getExecutioner().calculateOutputShape(this);
    }
    @Override
    public String opName() {
        return "tile";