cavis/libnd4j/include/ops/declarable/helpers/cpu/random.cpp

/*******************************************************************************
 * Copyright (c) 2019 Konduit K.K.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
//  @author sgazeos@gmail.com
//

#include <ops/declarable/helpers/random.h>
//#include <vector>
#include <memory>
//#include <graph/Context.h>
#include <ShapeUtils.h>
#include <helpers/RandomLauncher.h>
#include <execution/Threads.h>
#include <helpers/ConstantTadHelper.h>

namespace nd4j {
namespace ops {
namespace helpers {

    template <typename T>
    void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) {

        Nd4jLong* broadcasted = nullptr;
        if (beta != nullptr)
            ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace());
        else
            broadcasted = alpha->shapeInfo();
        auto step = shape::length(broadcasted);
        auto shift = output->lengthOf() / step;

        auto copyAlpha = alpha;
        auto copyBeta = beta;
        if (beta != nullptr) {
            NDArray alphaBroadcasted(broadcasted, alpha->dataType(), false, context);
            NDArray betaBroadcasted(broadcasted, beta->dataType(), false, context);

            copyAlpha = new NDArray(alphaBroadcasted.applyTrueBroadcast(BroadcastOpsTuple::Assign(), *alpha));
            copyBeta  = new NDArray(betaBroadcasted.applyTrueBroadcast(BroadcastOpsTuple::Assign(), *beta));

        }
//        bool directAlpha = alpha->ews() == 1 && alpha->ordering() == 'c';
        bool directOutput = output->ews() == 1 && output->ordering() == 'c';
        T* outputBuf = output->dataBuffer()->primaryAsT<T>();

        PRAGMA_OMP_PARALLEL_FOR
        for (Nd4jLong k = 0; k < shift; k++) {
            auto pos = k * step;
            auto u = rng.relativeT<T>(k, 0., 1.);
            for (Nd4jLong e = 0; e < step; e++)
                    if (directOutput) {
                        outputBuf[pos + e] = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
                                                                        beta != nullptr ? copyBeta->t<T>(e) * u : u);
                    }
                    else {
                        output->t<T>(pos + e) = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
                                                                        beta != nullptr ? copyBeta->t<T>(e) * u : u);
                    }
        }

        if (beta != nullptr) {
            delete copyAlpha;
            delete copyBeta;
            //delete broadcasted;
        }
    }

    void fillRandomGamma(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) {
        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomGamma_, (context, rng, alpha, beta, output), FLOAT_NATIVE);
    }
    BUILD_SINGLE_TEMPLATE(template void fillRandomGamma_, (LaunchContext* context,
            graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output), FLOAT_NATIVE);

    /*
     * algorithm Poisson generator based upon the inversion by sequential search:[48]:505
    init:
         Let x ← 0, p ← e−λ, s ← p.
         Generate uniform random number u in [0,1].
    while u > s do:
         x ← x + 1.
         p ← p * λ / x.
         s ← s + p.
    return x.
     * */
    template <typename T>
    void fillRandomPoisson_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output) {
        auto shift = output->lengthOf() / lambda->lengthOf();
        auto step = lambda->lengthOf();
        T* lambdaBuf = lambda->dataBuffer()->primaryAsT<T>();
        T* outputBuf = output->dataBuffer()->primaryAsT<T>();
        bool directLa = lambda->ews() == 1 && lambda->ordering() == 'c';
        bool directOut = output->ews() == 1 && output->ordering() == 'c';
        PRAGMA_OMP_PARALLEL_FOR
        for (Nd4jLong k = 0; k < shift; k++) {
            auto pos = k * step;
            auto u = rng.relativeT<T>(k, 0., 1.);
            for (Nd4jLong e = 0; e < step; e++) {
                auto p = math::nd4j_exp<T, T>(-lambda->t<T>(e));
                auto s = p;
                auto x = T(0.f);
                while (u > s) {
                    x += 1.f;
                    p *= directLa?lambdaBuf[e]/x:lambda->t<T>(e) / x;
                    s += p;
                }
                if (directOut)
                    outputBuf[pos + e] = x;
                else
                    output->t<T>(pos + e) = x;
            }
        }
    }

    void fillRandomPoisson(LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output) {
        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomPoisson_, (context, rng, lambda, output), FLOAT_NATIVE);
    }
    BUILD_SINGLE_TEMPLATE(template void fillRandomPoisson_, (LaunchContext* context,
            graph::RandomGenerator& rng, NDArray* lambda, NDArray* output), FLOAT_TYPES);

    template <typename T>
    void fillRandomUniform_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output) {
        T minVal = T(0);
        T maxVal = DataTypeUtils::max<T>();
        if (min)
            minVal = min->t<T>(0);
        if (max)
            maxVal = max->t<T>(0);

        if (output->isR())
            RandomLauncher::fillUniform(context, rng, output, minVal, maxVal);
        else {
            PRAGMA_OMP_PARALLEL_FOR
            for (Nd4jLong i = 0; i < output->lengthOf(); i++) {
                output->t<T>(i) = rng.relativeT<T>(i, minVal, maxVal);
            }
        }
    }

    void fillRandomUniform(LaunchContext* context, graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output) {
        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomUniform_, (context, rng, min, max, output), NUMERIC_TYPES);
    }

    // used https://en.wikipedia.org/wiki/Categorical_distribution
    // methods: gumbel trick + softmax + argmax
    template <typename Tx, typename Tz>
    void fillRandomMultiNomial_(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) {
        
        const Tx* x = input.bufferAsT<Tx>();
        Tz* z = output.bufferAsT<Tz>();
        
        Tx minVal = DataTypeUtils::min<Tx>();
        Tx maxVal = 1.0; 

        auto dimA = (0 == dimC) ? 1 : 0;
        const Nd4jLong batchValue = output.sizeAt(dimC);
        const Nd4jLong numOfClassX = input.sizeAt(dimA);

        const Nd4jLong zDimAstride = output.stridesOf()[dimA];
        const Nd4jLong xDimAstride = input.stridesOf()[dimA];
        const Nd4jLong zDimCstride = output.stridesOf()[dimC];
        const Nd4jLong xDimCstride = input.stridesOf()[dimC];

        auto func = PRAGMA_THREADS_FOR_2D{
                for (auto nBatchIndex = start_x; nBatchIndex < stop_x; nBatchIndex += inc_x) {
                    for (auto nSampleIndexInBatch = start_y; nSampleIndexInBatch < stop_y; nSampleIndexInBatch += inc_y) {
                        
                        const Tx* xTad = x + (nBatchIndex * xDimCstride);
                        Tz* zTad = z + (nBatchIndex * zDimCstride);
                        Tz& arg = zTad[nSampleIndexInBatch * zDimAstride];
                        Tx Max = -minVal;

                        auto nSamplesPerBatch = nBatchIndex * numOfClassX * numOfSamples;
                        auto nClassesPerSample = nSampleIndexInBatch * numOfClassX;
                        for (Nd4jLong nClass = 0; nClass < numOfClassX; nClass += 1) {
                            auto nIndex = nSamplesPerBatch + nClassesPerSample + nClass;
                            auto unifornLog = nd4j::math::nd4j_log<Tx, Tx>(-nd4j::math::nd4j_log<Tx, Tx>(rng.relativeT<Tx>(nIndex, minVal, maxVal)));
                            Tx tValue = (xTad[nClass * xDimAstride] - unifornLog);
                            if (tValue > Max) {
                                Max = tValue;
                                arg = nClass;
                            }
                        }
                    }
                }
        };

        samediff::Threads::parallel_for(func, 0, batchValue, 1, 0, numOfSamples, 1);
        rng.rewindH(output.lengthOf()*numOfClassX);

        return;
    }

    void fillRandomMultiNomial(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) {
        BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), fillRandomMultiNomial_, (context, rng, input, output, numOfSamples, dimC), FLOAT_TYPES, INDEXING_TYPES);
    }

}
}
}
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								/*******************************************************************************
-												Shugeo random uniform int (#30)

* Corrected randomuniform declaration.

* Refactored uniform distribution for both cuda and cpu platforms.

* Refactored uniform distribution and tests.

* Fixed type usage with indices.

* Refactored uniform distribution implementation and tests to full conform with TF implementation.

* Refactored gamma function to use type util method.

* Copyright changes and fixes with ConstantHelper.

* Added error checking on allocate cuda device memory and operations.

											
										
										
											2019-11-06 11:49:27 +01:00
+								 * Copyright (c) 2019 Konduit K.K.
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								 *
 								 * This program and the accompanying materials are made available under the
 								 * terms of the Apache License, Version 2.0 which is available at
 								 * https://www.apache.org/licenses/LICENSE-2.0.
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 								 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 								 * License for the specific language governing permissions and limitations
 								 * under the License.
 								 *
 								 * SPDX-License-Identifier: Apache-2.0
 								 ******************************************************************************/
 								//
 								//  @author sgazeos@gmail.com
 								//
 								#include <ops/declarable/helpers/random.h>
 								//#include <vector>
 								#include <memory>
 								//#include <graph/Context.h>
 								#include <ShapeUtils.h>
-												Shugeo random uniform int (#30)

* Corrected randomuniform declaration.

* Refactored uniform distribution for both cuda and cpu platforms.

* Refactored uniform distribution and tests.

* Fixed type usage with indices.

* Refactored uniform distribution implementation and tests to full conform with TF implementation.

* Refactored gamma function to use type util method.

* Copyright changes and fixes with ConstantHelper.

* Added error checking on allocate cuda device memory and operations.

											
										
										
											2019-11-06 11:49:27 +01:00
+								#include <helpers/RandomLauncher.h>
-												Oleh multinomial (#163)

* libnd4j: Multinomial op #8570 first raw step of multinomial random data generator implementation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op #8570 next step of multinomial random categories generator implementation on both cpu and cuda, need corrections and code clean up before review and testing

* libnd4j: Multinomial op #8570 code clean up and fixed issues data selecting, moved from coords to tads

* libnd4j: Multinomial op #8570 fixed cuda build add reference for math materials that was used for implementation

* libnd4j: Multinomial op #8570 fixed several bugs, added several tests and improved cuda version. current implementation works, need testing of reproduction with the same seed

* libnd4j: Multinomial op #8570 fixes and optimization after discussion in both cuda and cpu

* libnd4j: Multinomial op #8570 add corrections after review, removed tads, replace 2D parallel loop by 3D

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed declaration and add tests need discussion

* libnd4j: Multinomial op fix in test

* libnd4j: Multinomial op corrected behavior to get reproducible results, fixed issue in uniform value getting, tests added, need cuda review and cuda testing

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed indexing on uniform calculation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some corrections in max min declaration

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed index calculation, added rewind, corrected input declaration, added stats tests, both cuda and cpu. cuda need testing

* libnd4j: Multinomial op fixed bugs on cuda nad cpu. need review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op corrected tests to handle different orders

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some improvements after code review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op more corrections after review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed seed usage, update tests, fixed cuda based on comments, fixed bug of rewind, removed one behavior, minor corrections.

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op minor corrections

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op rise the bound of fluctuation for random cases

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op modified operation inputs and update implementation and tests on both cpu and cuda

* libnd4j: Multinomial op corrected data types according ops.proto

Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-06 20:35:05 +01:00
+								#include <execution/Threads.h>
 								#include <helpers/ConstantTadHelper.h>
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
 								namespace nd4j {
 								namespace ops {
 								namespace helpers {
 								    template <typename T>
 								    void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) {
 								        Nd4jLong* broadcasted = nullptr;
 								        if (beta != nullptr)
 								            ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace());
 								        else
 								            broadcasted = alpha->shapeInfo();
 								        auto step = shape::length(broadcasted);
 								        auto shift = output->lengthOf() / step;
 								        auto copyAlpha = alpha;
 								        auto copyBeta = beta;
 								        if (beta != nullptr) {
 								            NDArray alphaBroadcasted(broadcasted, alpha->dataType(), false, context);
 								            NDArray betaBroadcasted(broadcasted, beta->dataType(), false, context);
-												Shyrma temp (#131)

* - specifying template instantiation for certain types in float16 and bloat16

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing bfloat16 and float16 member functions template specialization

Signed-off-by: Yurii <iuriish@yahoo.com>

* - rewrite and overload array +-*/ scalar and scalar +-*/ arr in NDAray class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make corrections which have to do with and rvalue lvalue conversions

Signed-off-by: Yurii <iuriish@yahoo.com>

* - provide move semantic in NDArray operators array +-/* array

Signed-off-by: Yurii <iuriish@yahoo.com>

* float16/bfloat16 tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* one more tweak

Signed-off-by: raver119 <raver119@gmail.com>

* - make float16 and bfloat16 to compile successfully on cuda

Signed-off-by: Yurii <iuriish@yahoo.com>

* - do not use resources of view-like arrays when move semantics is applied

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of pointers in signatures NDArray methods 1

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correction of signature of NDArray::dup method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correction of signature of NDArray::reduceAlongDimension method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyIndexReduce and applyTrueBroadcast methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyReduce3 and varianceAlongDimension methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::tensorsAlongDimension and diagonal methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::allTensorsAlongDimension

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::reduceAlongDimension 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyTransform 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyPairwiseTransform 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyBroadcast 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyTrueBroadcast 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyScalar and applyScalarArr

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::lambda methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::reduce3 methods 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of following NDArray methods: add/sub/mul/div row/column and fillAsTriangular

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::tileToShape methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::isShapeSameStrict method

Signed-off-by: Yurii <iuriish@yahoo.com>

* minor corrections in tests

Signed-off-by: Yurii <iuriish@yahoo.com>

* - replace reduce op in batchnorm mkldnn

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add explicit templates instantiations for operator+(NDArray&&. const scalar)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - corrections of casts in float16/bfloat16

Signed-off-by: Yurii <iuriish@yahoo.com>

* - provide move semantics in following NDArray methods: transform, applyTrueBroadcast, transpose, reshape, permute

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of input array A duplicate in svd cuda op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - avoid available bug in svd cuda API

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add temporary global memory buffer in svd cuda when calcUV = false and  m != n

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove test with blfoat16 type for betainC

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts after master has been merged in

Signed-off-by: Yurii <iuriish@yahoo.com>

* - changed type of affected input array in fused_batch_norm

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add several explicit type castings

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add ND4J_EXPORT to operators

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add explicit template types in instantiations of template arithm operators of NDArray class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - one more test fix

Signed-off-by: Yurii <iuriish@yahoo.com>

Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-20 20:35:39 +01:00
+								            copyAlpha = new NDArray(alphaBroadcasted.applyTrueBroadcast(BroadcastOpsTuple::Assign(), *alpha));
 								            copyBeta  = new NDArray(betaBroadcasted.applyTrueBroadcast(BroadcastOpsTuple::Assign(), *beta));
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
 								        }
 								//        bool directAlpha = alpha->ews() == 1 && alpha->ordering() == 'c';
 								        bool directOutput = output->ews() == 1 && output->ordering() == 'c';
 								        T* outputBuf = output->dataBuffer()->primaryAsT<T>();
 								        PRAGMA_OMP_PARALLEL_FOR
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								        for (Nd4jLong k = 0; k < shift; k++) {
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								            auto pos = k * step;
 								            auto u = rng.relativeT<T>(k, 0., 1.);
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								            for (Nd4jLong e = 0; e < step; e++)
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								                    if (directOutput) {
 								                        outputBuf[pos + e] = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
 								                                                                        beta != nullptr ? copyBeta->t<T>(e) * u : u);
 								                    }
 								                    else {
 								                        output->t<T>(pos + e) = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
 								                                                                        beta != nullptr ? copyBeta->t<T>(e) * u : u);
 								                    }
 								        }
 								        if (beta != nullptr) {
 								            delete copyAlpha;
 								            delete copyBeta;
 								            //delete broadcasted;
 								        }
 								    }
 								    void fillRandomGamma(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) {
 								        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomGamma_, (context, rng, alpha, beta, output), FLOAT_NATIVE);
 								    }
 								    BUILD_SINGLE_TEMPLATE(template void fillRandomGamma_, (LaunchContext* context,
 								            graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output), FLOAT_NATIVE);
 								    /*
 								     * algorithm Poisson generator based upon the inversion by sequential search:[48]:505
 								    init:
 								         Let x ← 0, p ← e−λ, s ← p.
 								         Generate uniform random number u in [0,1].
 								    while u > s do:
 								         x ← x + 1.
 								         p ← p * λ / x.
 								         s ← s + p.
 								    return x.
 								     * */
 								    template <typename T>
 								    void fillRandomPoisson_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output) {
 								        auto shift = output->lengthOf() / lambda->lengthOf();
 								        auto step = lambda->lengthOf();
 								        T* lambdaBuf = lambda->dataBuffer()->primaryAsT<T>();
 								        T* outputBuf = output->dataBuffer()->primaryAsT<T>();
 								        bool directLa = lambda->ews() == 1 && lambda->ordering() == 'c';
 								        bool directOut = output->ews() == 1 && output->ordering() == 'c';
 								        PRAGMA_OMP_PARALLEL_FOR
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								        for (Nd4jLong k = 0; k < shift; k++) {
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								            auto pos = k * step;
 								            auto u = rng.relativeT<T>(k, 0., 1.);
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								            for (Nd4jLong e = 0; e < step; e++) {
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								                auto p = math::nd4j_exp<T, T>(-lambda->t<T>(e));
 								                auto s = p;
 								                auto x = T(0.f);
 								                while (u > s) {
 								                    x += 1.f;
 								                    p *= directLa?lambdaBuf[e]/x:lambda->t<T>(e) / x;
 								                    s += p;
 								                }
 								                if (directOut)
 								                    outputBuf[pos + e] = x;
 								                else
 								                    output->t<T>(pos + e) = x;
 								            }
 								        }
 								    }
 								    void fillRandomPoisson(LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output) {
 								        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomPoisson_, (context, rng, lambda, output), FLOAT_NATIVE);
 								    }
 								    BUILD_SINGLE_TEMPLATE(template void fillRandomPoisson_, (LaunchContext* context,
 								            graph::RandomGenerator& rng, NDArray* lambda, NDArray* output), FLOAT_TYPES);
-												Shugeo random uniform int (#30)

* Corrected randomuniform declaration.

* Refactored uniform distribution for both cuda and cpu platforms.

* Refactored uniform distribution and tests.

* Fixed type usage with indices.

* Refactored uniform distribution implementation and tests to full conform with TF implementation.

* Refactored gamma function to use type util method.

* Copyright changes and fixes with ConstantHelper.

* Added error checking on allocate cuda device memory and operations.

											
										
										
											2019-11-06 11:49:27 +01:00
+								    template <typename T>
 								    void fillRandomUniform_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output) {
-												[WIP] Random Uniform (#36)

* args

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* T args

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-11-07 15:09:47 +01:00
+								        T minVal = T(0);
-												- new NDArrayFactory scalar constructor
- minor tweak in randomuniform
- one more test

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-11-08 06:49:41 +01:00
+								        T maxVal = DataTypeUtils::max<T>();
-												[WIP] Random Uniform (#36)

* args

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* T args

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-11-07 15:09:47 +01:00
+								        if (min)
 								            minVal = min->t<T>(0);
 								        if (max)
 								            maxVal = max->t<T>(0);
-												Shugeo random uniform int (#30)

* Corrected randomuniform declaration.

* Refactored uniform distribution for both cuda and cpu platforms.

* Refactored uniform distribution and tests.

* Fixed type usage with indices.

* Refactored uniform distribution implementation and tests to full conform with TF implementation.

* Refactored gamma function to use type util method.

* Copyright changes and fixes with ConstantHelper.

* Added error checking on allocate cuda device memory and operations.

											
										
										
											2019-11-06 11:49:27 +01:00
 								        if (output->isR())
 								            RandomLauncher::fillUniform(context, rng, output, minVal, maxVal);
 								        else {
 								            PRAGMA_OMP_PARALLEL_FOR
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								            for (Nd4jLong i = 0; i < output->lengthOf(); i++) {
-												Shugeo random uniform int (#30)

* Corrected randomuniform declaration.

* Refactored uniform distribution for both cuda and cpu platforms.

* Refactored uniform distribution and tests.

* Fixed type usage with indices.

* Refactored uniform distribution implementation and tests to full conform with TF implementation.

* Refactored gamma function to use type util method.

* Copyright changes and fixes with ConstantHelper.

* Added error checking on allocate cuda device memory and operations.

											
										
										
											2019-11-06 11:49:27 +01:00
+								                output->t<T>(i) = rng.relativeT<T>(i, minVal, maxVal);
 								            }
 								        }
 								    }
 								    void fillRandomUniform(LaunchContext* context, graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output) {
 								        BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomUniform_, (context, rng, min, max, output), NUMERIC_TYPES);
 								    }
-												Oleh multinomial (#163)

* libnd4j: Multinomial op #8570 first raw step of multinomial random data generator implementation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op #8570 next step of multinomial random categories generator implementation on both cpu and cuda, need corrections and code clean up before review and testing

* libnd4j: Multinomial op #8570 code clean up and fixed issues data selecting, moved from coords to tads

* libnd4j: Multinomial op #8570 fixed cuda build add reference for math materials that was used for implementation

* libnd4j: Multinomial op #8570 fixed several bugs, added several tests and improved cuda version. current implementation works, need testing of reproduction with the same seed

* libnd4j: Multinomial op #8570 fixes and optimization after discussion in both cuda and cpu

* libnd4j: Multinomial op #8570 add corrections after review, removed tads, replace 2D parallel loop by 3D

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed declaration and add tests need discussion

* libnd4j: Multinomial op fix in test

* libnd4j: Multinomial op corrected behavior to get reproducible results, fixed issue in uniform value getting, tests added, need cuda review and cuda testing

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed indexing on uniform calculation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some corrections in max min declaration

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed index calculation, added rewind, corrected input declaration, added stats tests, both cuda and cpu. cuda need testing

* libnd4j: Multinomial op fixed bugs on cuda nad cpu. need review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op corrected tests to handle different orders

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some improvements after code review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op more corrections after review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed seed usage, update tests, fixed cuda based on comments, fixed bug of rewind, removed one behavior, minor corrections.

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op minor corrections

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op rise the bound of fluctuation for random cases

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op modified operation inputs and update implementation and tests on both cpu and cuda

* libnd4j: Multinomial op corrected data types according ops.proto

Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-06 20:35:05 +01:00
 								    // used https://en.wikipedia.org/wiki/Categorical_distribution
 								    // methods: gumbel trick + softmax + argmax
 								    template <typename Tx, typename Tz>
 								    void fillRandomMultiNomial_(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) {
 								        const Tx* x = input.bufferAsT<Tx>();
 								        Tz* z = output.bufferAsT<Tz>();
 								        Tx minVal = DataTypeUtils::min<Tx>();
 								        Tx maxVal = 1.0;
 								        auto dimA = (0 == dimC) ? 1 : 0;
 								        const Nd4jLong batchValue = output.sizeAt(dimC);
 								        const Nd4jLong numOfClassX = input.sizeAt(dimA);
 								        const Nd4jLong zDimAstride = output.stridesOf()[dimA];
 								        const Nd4jLong xDimAstride = input.stridesOf()[dimA];
 								        const Nd4jLong zDimCstride = output.stridesOf()[dimC];
 								        const Nd4jLong xDimCstride = input.stridesOf()[dimC];
 								        auto func = PRAGMA_THREADS_FOR_2D{
 								                for (auto nBatchIndex = start_x; nBatchIndex < stop_x; nBatchIndex += inc_x) {
 								                    for (auto nSampleIndexInBatch = start_y; nSampleIndexInBatch < stop_y; nSampleIndexInBatch += inc_y) {
 								                        const Tx* xTad = x + (nBatchIndex * xDimCstride);
 								                        Tz* zTad = z + (nBatchIndex * zDimCstride);
 								                        Tz& arg = zTad[nSampleIndexInBatch * zDimAstride];
 								                        Tx Max = -minVal;
 								                        auto nSamplesPerBatch = nBatchIndex * numOfClassX * numOfSamples;
 								                        auto nClassesPerSample = nSampleIndexInBatch * numOfClassX;
-												Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

											
										
										
											2020-02-26 19:12:19 +01:00
+								                        for (Nd4jLong nClass = 0; nClass < numOfClassX; nClass += 1) {
-												Oleh multinomial (#163)

* libnd4j: Multinomial op #8570 first raw step of multinomial random data generator implementation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op #8570 next step of multinomial random categories generator implementation on both cpu and cuda, need corrections and code clean up before review and testing

* libnd4j: Multinomial op #8570 code clean up and fixed issues data selecting, moved from coords to tads

* libnd4j: Multinomial op #8570 fixed cuda build add reference for math materials that was used for implementation

* libnd4j: Multinomial op #8570 fixed several bugs, added several tests and improved cuda version. current implementation works, need testing of reproduction with the same seed

* libnd4j: Multinomial op #8570 fixes and optimization after discussion in both cuda and cpu

* libnd4j: Multinomial op #8570 add corrections after review, removed tads, replace 2D parallel loop by 3D

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed declaration and add tests need discussion

* libnd4j: Multinomial op fix in test

* libnd4j: Multinomial op corrected behavior to get reproducible results, fixed issue in uniform value getting, tests added, need cuda review and cuda testing

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed indexing on uniform calculation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some corrections in max min declaration

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed index calculation, added rewind, corrected input declaration, added stats tests, both cuda and cpu. cuda need testing

* libnd4j: Multinomial op fixed bugs on cuda nad cpu. need review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op corrected tests to handle different orders

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op some improvements after code review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op more corrections after review

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op fixed seed usage, update tests, fixed cuda based on comments, fixed bug of rewind, removed one behavior, minor corrections.

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op minor corrections

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op rise the bound of fluctuation for random cases

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j: Multinomial op modified operation inputs and update implementation and tests on both cpu and cuda

* libnd4j: Multinomial op corrected data types according ops.proto

Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-06 20:35:05 +01:00
+								                            auto nIndex = nSamplesPerBatch + nClassesPerSample + nClass;
 								                            auto unifornLog = nd4j::math::nd4j_log<Tx, Tx>(-nd4j::math::nd4j_log<Tx, Tx>(rng.relativeT<Tx>(nIndex, minVal, maxVal)));
 								                            Tx tValue = (xTad[nClass * xDimAstride] - unifornLog);
 								                            if (tValue > Max) {
 								                                Max = tValue;
 								                                arg = nClass;
 								                            }
 								                        }
 								                    }
 								                }
 								        };
 								        samediff::Threads::parallel_for(func, 0, batchValue, 1, 0, numOfSamples, 1);
 								        rng.rewindH(output.lengthOf()*numOfClassX);
 								        return;
 								    }
 								    void fillRandomMultiNomial(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) {
 								        BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), fillRandomMultiNomial_, (context, rng, input, output, numOfSamples, dimC), FLOAT_TYPES, INDEXING_TYPES);
 								    }
-												Gamma and Poisson distributions (#27)

* Added implementation for random_gamma op.

* Added implementation for random_poisson op and support classes.

* Added helpers for random_poisson and random_gamma ops.

* Implementation of random_poisson. The first working edition.

* Implementation of random_poisson. Parallelized working edition.

* Implementation of random_gamma. Parallelized working edition with alpha only.

* Added cuda implementation for helper of poisson distribution.

* Corrected shape calculation with random_gamma and tests.

* Finished cpu implementation for gamma distribution.

* Finished cuda implementation for random_gamma op.

* Refactored cpu helpers for random_gamma and random_poisson ops.

* Refactored cuda helpers for gamma and poisson distribution.

* Refactored cuda helper for gamma distribution.

* Refactored cpu helper for random_poisson op.

* Refactored cpu helper for random_gamma op.

											
										
										
											2019-11-04 14:42:28 +01:00
+								}
 								}
 								}