cavis/libnd4j/include/ops/declarable/headers/parity_ops.h

/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  * See the NOTICE file distributed with this work for additional
 *  * information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

//
//  @author raver119@gmail.com
//

#ifndef LIBND4J_HEADERS_PARITY_H
#define LIBND4J_HEADERS_PARITY_H

#include <ops/declarable/headers/common.h>

namespace sd {
    namespace ops {
        /**
         * This operation returns index of max element in a given NDArray (optionally: along given dimension(s))
         * Expected input:
         * 0: N-dimensional array
         * 1: optional axis vector
         *
         * Int args:
         * 0: optional axis
         */
        #if NOT_EXCLUDED(OP_argmax)
        DECLARE_CUSTOM_OP(argmax, 1, 1, false, 0, -2);
        #endif

        /**
         * This operation returns index of min element in a given NDArray (optionally: along given dimension(s))
         * Expected input:
         * 0: N-dimensional array
         * 1: optional axis vector
         *
         * Int args:
         * 0: optional axis
         */
        #if NOT_EXCLUDED(OP_argmin)
        DECLARE_CUSTOM_OP(argmin, 1, 1, false, 0, -2);
        #endif

        /**
         * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s))
         * Expected input:
         * 0: N-dimensional array
         * 1: optional axis vector
         *
         * Int args:
         * 0: optional axis
         */
        #if NOT_EXCLUDED(OP_argamax)
        DECLARE_CUSTOM_OP(argamax, 1, 1, false, 0, -2);
        #endif

        /**
         * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s))
         * Expected input:
         * 0: N-dimensional array
         * 1: optional axis vector
         *
         * Int args:
         * 0: optional axis
         */
        #if NOT_EXCLUDED(OP_argamin)
        DECLARE_CUSTOM_OP(argamin, 1, 1, false, 0, -2);
        #endif

        /**
         * This operation provides various normalization modes:
         * 0: frobenius
         * 1: euclidean (norm2)
         * 2: norm1
         * 3: norm2
         * 4: inf-norm
         * 5: p-norm
         *
         * Expected arguments:
         * input: N-dimensional array
         *
         *
         * Int args:
         * 0...: axis
         *
         * T args:
         * 0: norm mode
         * 1: p for p-norm
         */
        #if NOT_EXCLUDED(OP_norm)
        DECLARE_REDUCTION_OP(norm, 1, 1, false, 1, -2);
        #endif

        /**
        * Inserts elements provided by diagonal array into the main diagonal of innermost matrices of input array
        *
        * Input arrays:
        *  0: input array, considered as batch of matrices
        *  1: diagonal array containing elements to be inserted into input array,
        *     following rank condition should be satisfied: diagonal_rank = input_rank - 1,
        *     the shapes of diagonal and input arrays must be equal except last dimension of input array,
        *     for example if input_shape = [A,B,C,D] then diagonal_shape = [A,B,C],
        *     also last dimension of diagonal array should be equal to smaller of last and last but one input dimensions
        *     that is: diagonal_shape[-1] = min(input_shape[-1], input_shape[-2])
        *
        * Output array:
        *  0: has the same shape as input, corresponding diagonal elements are substituted
        */
        #if NOT_EXCLUDED(OP_matrix_set_diag)
        DECLARE_CONFIGURABLE_OP(matrix_set_diag, 2, 1, false, 0, 0);
        #endif

        /**
        * Inserts elements provided by diagonal array into the main diagonal of innermost matrices of output array,
        * rest output elements are set to zeros
        *
        * Input array:
        *    diagonal: array containing elements to be inserted into output array,
        *              following rank condition is present: diagonal_rank = ouput_rank - 1
        *
        * Output array:
        *   0: is considered as batch of matrices, if for example diagonal array has shape [A,B,C] then output array has shape [A,B,C,C]
        */
        DECLARE_CUSTOM_OP(matrix_diag, 1, 1, false, 0, 0);

        /**
        * This op calculates regularized incomplete beta integral Ix(a, b).
        * Implementation is based on two algorithms depending on input values of a and b:
        * - when a and b are both >  maxValue (3000.), then Gauss-Legendre quadrature method is applied
        * - when a and b are both <= maxValue (3000.), then modified Lentz’s algorithm for continued fractions is applied
        *
        * Input arrays:
        *    a: defines power t^{a-1}, must be > 0, type float.
        *    b: defines power (1-t)^{b-1}, must be > 0, type float.
        *    x: defines upper limit of integration, must be within (0 <= x <= 1) range, type float.
        *
        * Output array:
        *    0: values of  regularized incomplete beta integral that corresponds to variable upper limit x, type float
        *
        * Three input and one output arrays must have the same shape
        */
        #if NOT_EXCLUDED(OP_betainc)
        DECLARE_CONFIGURABLE_OP(betainc, 3, 1, false, 0, 0);
        #endif

        /**
         * This operation is added for compatibility purposes mostly.
         * PLEASE NOTE: Please consider using Add instead
         * Expected arguments:
         * 0: N-dimensional input
         * 1: bias vector
         */
        #if NOT_EXCLUDED(OP_biasadd)
        DECLARE_CUSTOM_OP(biasadd, 2, 1, true, 0, 0);
        DECLARE_CUSTOM_OP(biasadd_bp, 3, 2, false, 0, 0);
        #endif

        /**
         * Returns a diagonal tensor with a given diagonal values. Given a diagonal, this operation returns a tensor with the diagonal and everything else padded with zeros.
         */
        #if NOT_EXCLUDED(OP_diag)
        DECLARE_CUSTOM_OP(diag, 1, 1, false, 0, 0);
        #endif

        /**
         * Returns a diagonal tensor with a given diagonal values. Given a diagonal, this operation returns a tensor with the diagonal and everything else padded with zeros.
         */
        #if NOT_EXCLUDED(OP_diag_part)
        DECLARE_CUSTOM_OP(diag_part, 1, 1, false, 0, 0);
        #endif

        /**
         * Returns a diagonal vector for any submatricies with in a given tensor.
         * It is an op inverse to matrix_set_giag.
         * Using input tensor as batched 2D diagonals flat them to vector (1D) with diagonal values.
         *
         * Input : batched tensor with rank >=2
         * Output: tensor with rank lesser by 1 from input
         */
        #if NOT_EXCLUDED(OP_matrix_diag_part)
        DECLARE_CUSTOM_OP(matrix_diag_part, 1, 1, false, 0, 0);
        #endif

        /**
         * QR decomposition: A = QR, where Q is ortogonal (Q * QT = I) and R is upper triangular.
         * For A (MxN) Q is M x M and R is (NxN). 
         *
         * Input : 
         *    0 - float (or complex float) tensor with shape {.,..,...,M,N} - batch of float matricies
         *
         * Output: 
         *    0 - float tensor with shape {.,..,...,MxN} - batch of ortogonal matricies {Qs}
         *    1 - float tensor with shape {.,..,...,NxN} - batch of upper triangular matricies {Rs}
         */
        #if NOT_EXCLUDED(OP_qr)
        DECLARE_CUSTOM_OP(qr, 1, 2, false, 0, 0);
        #endif

        /**
         * This operation takes 2 arrays: original values, and values to be excluded. And returns 2 arrays: values left after exclusion, and indices in original array for surivals.
         * Expected arguments:
         * 0: vector with original values
         * 1: vector with values to exclude
         */
        #if NOT_EXCLUDED(OP_listdiff)
        DECLARE_CUSTOM_OP(listdiff, 2, 2, false, 0, 0);
        #endif

        /**
         * This operation applies Add operation to specific inputs wrt indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_add)
        DECLARE_OP(scatter_add, 3, 1, true);
        #endif

        /**
         * This operation applies Subtract operation to specific inputs wrt indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_sub)
        DECLARE_OP(scatter_sub, 3, 1, true);
        #endif

        /**
         * This operation applies Multiply operation to specific inputs wrt indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_mul)
        DECLARE_OP(scatter_mul, 3, 1, true);
        #endif

        /**
         * This operation applies Divide operation to specific inputs wrt indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_div)
        DECLARE_OP(scatter_div, 3, 1, true);
        #endif

        /**
         * This operation applies Assign operation to specific inputs wrt indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_upd)
        DECLARE_OP(scatter_upd, 3, 1, true);
        #endif

        /**
         * This operation applies Max operation to specific inputs through given indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_max)
        DECLARE_OP(scatter_max, 3, 1, true);
        #endif

        /**
         * This operation applies Min operation to specific inputs through given indices
         * Expected arguments:
         * input: array to be updated
         * indices: array containing indexes for first dimension of input
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_min)
        DECLARE_OP(scatter_min, 3, 1, true);
        #endif

        /**
         * This operation scatter "updates" elements into new output array according to given "indices"
         * Expected arguments:
         * indices: array containing elements/slices indexes of output array to put "updates" elements into, the rest output elements will be zeros
         * updates: array containing elements to be inserted into output array
         * shape: contains shape of output array
         */
        #if NOT_EXCLUDED(OP_scatter_nd)
        DECLARE_CUSTOM_OP(scatter_nd, 3, 1, false, 0, 0);
        #endif

        /**
         * This operation scatter "updates" elements into input array along given "indices"
         * Expected arguments:
         * input: array to be updated
         * indices: array containing elements/slices indexes of input array to put "updates" elements into
         * updates: array containing elements to be inserted into input array
         */
        #if NOT_EXCLUDED(OP_scatter_nd_update)
        DECLARE_OP(scatter_nd_update, 3, 1, true);
        #endif

        /**
         * This operation adds "updates" elements to input array along given "indices"
         * Expected arguments:
         * input: array to be updated
         * indices: array containing elements/slices indexes of input array to add "updates" elements to
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_add)
        DECLARE_OP(scatter_nd_add, 3, 1, true);
        #endif

        /**
         * This operation subtract "updates" elements from input array along given "indices"
         * Expected arguments:
         * input: array to be updated
         * indices: array containing elements/slices indexes of input array to subtract "updates" elements from
         * updates: array containing elements to be interfered with input
         */
        #if NOT_EXCLUDED(OP_scatter_sub)
        DECLARE_OP(scatter_nd_sub, 3, 1, true);
        #endif

        /**
         * This operation takes input's shape, and returns new NDArray filled with specified value
         * Expected arguments:
         * input: N-dimensional array
         *
         * T args:
         * 0: scalar value, used to fill NDArray
         */
        #if NOT_EXCLUDED(OP_fill_as)
        DECLARE_CONFIGURABLE_OP(fill_as, 1, 1, true, 1, 0);
        #endif

        /**
         * This operation applies element-wise rint (round to integral value) operation
         */
        #if NOT_EXCLUDED(OP_rint)
        DECLARE_OP(rint, 1, 1, true);
        #endif

        /**
         * This operation returns unique elements from input array as vector, and their original indices in input array
         * Expected input:
         * input: N-dimensional array
         */
        #if NOT_EXCLUDED(OP_unique)
        DECLARE_CUSTOM_OP(unique, 1, 2, false, 0, 0);
        #endif

        /**
         * This operation returns 3 1D arrays for given 1D array with unique element count and indexes
         * input:
         *     0 - 1D array
         *
         * output:
         *     0 - 1D array with unique values
         *     1 - 1D array with ids for values in array above
         *     2 - 1D array with counts for values in array above
         */
        #if NOT_EXCLUDED(OP_unique_with_counts)
        DECLARE_CUSTOM_OP(unique_with_counts, 1, 3, false, 0, 0);
        #endif

        /**
         * This operation splits input NDArray into multiple TADs along given dimensions
         * Expected arguments:
         * input: N-dimensional array
         *
         * Int args:
         * 0..: TAD axis
         */
        #if NOT_EXCLUDED(OP_tear)
        DECLARE_CUSTOM_OP(tear, 1, -1, false, 0, -1);
        #endif

        /**
         * This op does the same as tear, just uses different input format:
         * @tparam T
         */
        #if NOT_EXCLUDED(OP_unstack)
        DECLARE_CUSTOM_OP(unstack, 1, -1, false, 0, 1);
        #endif

        /**
         * This operation extracts a strided (optionally) slice from a tensor,
         */
        #if NOT_EXCLUDED(OP_strided_slice)
        DECLARE_CUSTOM_OP(strided_slice, 1, 1, false, 0, 5); // TODO: new op type needed. that returns VIEW
        DECLARE_CUSTOM_OP(strided_slice_bp, 2, 1, false, 0, 5);
        #endif

        /**
         * This operation extracts a slice from a tensor.
         *
         */
        #if NOT_EXCLUDED(OP_slice)
        DECLARE_CUSTOM_OP(slice, 1, 1, false, 0, -2);
        DECLARE_CUSTOM_OP(slice_bp, 2, 1, false, 0, -2);
        #endif

        /**
         * This operation generate sequences. Basically from......to, with step used as increment.
         * Expected arguments:
         * start: optional scalar with starting value
         * stop: optional scalar with end value
         * step: optional scalar witn step value
         *
         * Int args: (optional)
         * 0: optional scalar with starting value
         * 1: optional scalar with end value
         * 1: optional scalar witn step value
         *
         * T args: (optional)
         * 0: optional scalar with starting value
         * 1: optional scalar with end value
         * 1: optional scalar witn step value
         */
        #if NOT_EXCLUDED(OP_range)
        DECLARE_CUSTOM_OP(range, -2, 1, false, -2, -2);
        #endif

        /**
         * This operation return one-hot encoded n-dimensional array
         * Expected arguments:
         * input: N-dimensional array
         *
         * T args:
         * 0: 'on' value
         * 1: 'off' value
         *
         * Int args:
         * 0: depth
         * 1: axis
         */
        #if NOT_EXCLUDED(OP_onehot)
        DECLARE_CUSTOM_OP(onehot, 1, 1, false, -2, -2);
        #endif


        /**
         * This operation calculate the confusion matrix for a
         * pair of prediction and label 1-D arrays.
         * Expected arguments:
         * Input arrays:
         *   0 - predictions: 1-D array
         *   1 - labels: 1-D array
         *   2 - weights : optional
         * Int args:
         *   0 - num_classes: optional
         *
         */
        #if NOT_EXCLUDED(OP_confusion_matrix)
        DECLARE_CUSTOM_OP(confusion_matrix, 2, 1, false, 0, -2);
        #endif

        /**
		 * This operation stacks a list of rank tensors into one rank-(R+1) tensor.
		 * Expected arguments:
		 * 0...: N-Dimensional arrays to stack
		 *
		 */
        #if NOT_EXCLUDED(OP_stack)
        DECLARE_CUSTOM_OP(stack, -1, 1, false, 0, 0);
        #endif

        /**
         * This operation returns length of input array
         * Expected arguments:
         * input: N-dimensional array
         *
         * TODO: make this operation reduction, to allow TAD -> size
         */
        #if NOT_EXCLUDED(OP_size)
        DECLARE_CUSTOM_OP(size, 1, 1, false, 0, 0); // add DeclarableScalarOp?
        #endif


        /**
         * This operation returns rank of input array as scalar value.
         */
        #if NOT_EXCLUDED(OP_rank)
        DECLARE_CUSTOM_OP(rank, 1, 1, false, 0, 0); // ^
        #endif


        #if NOT_EXCLUDED(OP_broadcastgradientargs)
        DECLARE_OP(broadcastgradientargs, 2, 2, true);
        #endif

        /**
         * This operation takes input's shape, and returns new NDArray filled with zeros
         * Expected arguments:
         * input: N-dimensional array
         *
         */
        #if NOT_EXCLUDED(OP_zeros_as)
        DECLARE_CUSTOM_OP(zeros_as, 1, 1, false, 0, 0);
        #endif

        /**
         * This operation takes input's shape, and returns new NDArray filled with ones
         * Expected arguments:
         * input: N-dimensional array
         *
         */
        #if NOT_EXCLUDED(OP_ones_as)
        DECLARE_CUSTOM_OP(ones_as, 1, 1, false, 0, 0);
        #endif

        /**
         * This operation applies element-wise pow(x, 2) to the given input
         * Expected arguments:
         * input: N-Dimensional array
         */
        #if NOT_EXCLUDED(OP_square)
        DECLARE_OP(square, 1, 1, true);
        #endif

        /**
        * This op calculates Hurwitz zeta function zeta(x, q) = sum_{n=0}^{inf} (q + n)^{-x}
        * Implementation is based on Euler-Maclaurin summation formula
        *
        *   Input arrays:
        *   x: define power {-x}, must be > 1, type float.
        *   q: define summand in denominator, must be > 0, type float.
        *
        * Output array:
        *    0: corresponding values of Hurwitz zeta function
        *
        * Two input and one output arrays must have the same shape
        */
        #if NOT_EXCLUDED(OP_zeta)
        DECLARE_CONFIGURABLE_OP(zeta, 2, 1, false, 0, 0);
        #endif

        /**
        * This op calculates polygamma function psi^(n)(x). Implementation is based on serial representation written in
        * terms of the Hurwitz zeta function: polygamma = (-1)^{n+1} * n! * zeta(n+1, x).
        *
        * Input arrays:
        *    0: n - define derivative order (n+1), type integer (however currently is implemented as float casted to integer)
        *    1: x - abscissa points where to evaluate the polygamma function, type float
        *
        * Output array:
        *    0: values of polygamma function at corresponding x, type float
        *
        * Two input and one output arrays have the same shape
        */
        #if NOT_EXCLUDED(OP_polygamma)
        DECLARE_CONFIGURABLE_OP(polygamma, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates lgamma function lgamma(x) = log(Gamma(x))
        *
        * Input arrays:
        *    0: x - input matrix
        *
        * Output array:
        *    0: log of Gamma(x)
        *
        */
        #if NOT_EXCLUDED(OP_lgamma)
        DECLARE_OP(lgamma, 1, 1, true);
        #endif

        /**
        * This op calculates digamma function psi(x) = derivative of log(Gamma(x))
        *
        * Input arrays:
        *    0: x - abscissa points where to evaluate the digamma function, type float
        *
        * Output array:
        *    0: values of digamma function at corresponding x, type float
        *
        */
        #if NOT_EXCLUDED(OP_digamma)
        DECLARE_CONFIGURABLE_OP(digamma, 1, 1, false, 0, 0);
        #endif

        /**
         * This operation takes shape as first argument, and returns new NDArray filled with specific scalar value.
         * Input arrays:
         * 0 - shape vector
         * 1 - optional scalar NDArray
         *
         * T arguments:
         * 0 - optional scalar value
         *
         */
        #if NOT_EXCLUDED(OP_fill)
        DECLARE_CUSTOM_OP(fill, 1, 1, false, -2, 0);
        #endif

        /**
         * This operation splits given NDArray into chunks of specific size, along given dimension
         * Input arrays:
         * 0 - input array
         * 1 - array of sizes
         * 2 - optional axis
         *
         * Integer arguments:
         * 0 - optional axis
         *
         */
        #if NOT_EXCLUDED(OP_split_v)
        DECLARE_CUSTOM_OP(split_v, 2, -1, false, 0, -2);
        #endif

        /**
         * This operation splits given NDArray into chunks of specific size, along given dimension
         * 0 - input array
         * 1 - optional axis
         *
         * Integer arguments:
         * 0 - number of splits
         * 1 - optional axis
         */
        #if NOT_EXCLUDED(OP_split)
        DECLARE_CUSTOM_OP(split, 1, -1, false, 0, 1);
        #endif


        /**
         * This operation adjusts image hue by delta
         * Input arrays:
         * 0 - input array with rank >= 3, must have at least one dimension equal 3, that is dimension containing channels.
         * 1 - optional argument, input scalar-array containing delta
         *
         * T arguments:
         * 0 - optional argument, delta value
         *
         * Int arguments:
         * 0 - optional argument, corresponds to dimension with 3 channels
         */
        #if NOT_EXCLUDED(OP_adjust_hue)
        DECLARE_CONFIGURABLE_OP(adjust_hue, 1, 1, true, 0, 0);
        #endif

        /**
         * This operation adjusts image saturation by delta
         * Input arrays:
         * 0 - input array with rank >= 3, must have at least one dimension equal 3, that is dimension containing channels.
         * 1 - optional argument, input scalar-array containing saturation factor
         *
         * T arguments:
         * 0 - optional argument, saturation factor
         *
         * Int arguments:
         * 0 - optional argument, corresponds to dimension with 3 channels
         */
        #if NOT_EXCLUDED(OP_adjust_saturation)
        DECLARE_CONFIGURABLE_OP(adjust_saturation, 1, 1, true, 0, 0);
        #endif

        /**
         * This operation adjusts image contrast by given factor ( z = (x - mean) * factor + mean )
         * Input arrays:
         * 0 - input array with rank >= 3, must have last one dimension equal 3, that is dimension containing channels.
         * 1 - optional argument, input scalar-array containing saturation contrast factor
         *
         * T arguments:
         * 0 - optional argument, contrast factor
         *
         */
        #if NOT_EXCLUDED(OP_adjust_contrast)
        DECLARE_CONFIGURABLE_OP(adjust_contrast, 1, 1, true, 0, 0);
        DECLARE_CONFIGURABLE_OP(adjust_contrast_v2, 1, 1, true, 0, 0);
        #endif


        /**
         * This operation rearranges data from depth into blocks of spatial data. This is the reverse transformation
         * of space_to_depth op. This op output is a copy of the input tensor where values from the depth dimension
         * are moved in spatial blocks to the height and width dimensions. Int attr 0 indicates the input
         * block size and how the data is moved.
         * Input:
         *     0 - 4D tensor on given type
         * Output:
         *     0 - 4D tensor of given type and proper shape
         *
         * Int arguments:
         *     0 - block size
         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
         *                             1 ("NCHW"): shape{ batch, channels, height, width }
         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
         *                             optional (default 0)
         */
        #if NOT_EXCLUDED(OP_depth_to_space)
        DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, -1);
        #endif

        /**
         * This operation rearranges blocks of spatial data, into depth.This op output is a copy of the input tensor
         * where values from the height and width dimensions are moved to the depth dimension. Int attr 0 indicates
         * the input block size.
         *
         * Input:
         *     - 4D tensor of given type
         * Output:
         *     - 4D tensor
         *
         * Int arguments:
         *     0 - block size
         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
         *                             1 ("NCHW"): shape{ batch, channels, height, width }
         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
         *                             optional (default 0)
         *
         */
        #if NOT_EXCLUDED(OP_space_to_depth)
        DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, -1);
        #endif

        /**
         * This op calculates cross-product between input arguments
         * Input arguments
         * 0 - vector or tensor A
         * 1 - vector or tensor B
         */
        #if NOT_EXCLUDED(OP_cross)
        DECLARE_OP(cross, 2, 1, false);
        #endif

        /**
         * Zero-pads and then rearranges (permutes) blocks of spatial data into batch. More specifically, this op
         * outputs a copy of the input tensor where values from the height and width dimensions are moved to the
         * batch dimension. After the zero-padding, both height and width of the input must be divisible by the block
         * size.
         *
         * Inputs:
         *  0 - input tensor
         *  1 - 2D paddings tensor (shape {M, 2})
         *
         *  Output:
         *    - result tensor
         *
         *  Int args:
         *      0 - block size (M)
         *
         */
        #if NOT_EXCLUDED(OP_space_to_batch)
        DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1);
        #endif

        /*
         * This operation divides "spatial" dimensions [1, ..., M] of the input into a grid of blocks of shape
         * block_shape, and interleaves these blocks with the "batch" dimension (0) such that in the output,
         * the spatial dimensions [1, ..., M] correspond to the position within the grid, and the batch dimension
         * combines both the position within a spatial block and the original batch position. Prior to division into
         * blocks, the spatial dimensions of the input are optionally zero padded according to paddings.
         *
         * Inputs:
         *      0 - input (N-D tensor)
         *      1 - block_shape - int 1D tensor with M length
         *      2 - paddings - int 2D tensor with shape {M, 2}
         *
         * Output:
         *      - N-D tensor with the same type as input 0.
         *
         * */
        #if NOT_EXCLUDED(OP_space_to_batch_nd)
        DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0);
        #endif

        /**
         *
         *
         */
        #if NOT_EXCLUDED(OP_batch_to_space)
        DECLARE_CUSTOM_OP(batch_to_space, 2, 1, false, 0, 1);
        #endif
        #if NOT_EXCLUDED(OP_batch_to_space_nd)
        DECLARE_CUSTOM_OP(batch_to_space_nd, 3, 1, false, 0, 0);
        #endif

        /**
         * top_k operation returns a vector of k top values for
         *  given NDArray as tensor with default boolean (true)
         *  as sort for result index array
         *  will be sorted by the values in descending order.
         *  The first parameter is a NDArray for working.
         *  The second is k (default 1) - optional
         *  The third is boolean value(default is true) (0 - as is, 1 - sorted by value) optional
         */
        #if NOT_EXCLUDED(OP_top_k)
        DECLARE_CUSTOM_OP(top_k, 1, 2, false, 0, -1);
        #endif

        /**
         * in_top_k operation returns a vector of k boolean values for
         *  given NDArray as 2D matrix of predicted in the NDArray k top values
         *  The first parameter is a NDArray of predicted values (2d array).
         *  The second is NDArray as vector of indeces k top values will be search.
         *  The third is k
         */
        #if NOT_EXCLUDED(OP_in_top_k)
        DECLARE_CUSTOM_OP(in_top_k, 2, 1, true, 1, 1);
        #endif

        /**
         * moments operation calculate a mean and variation for given NDArray
         * with reduce a result according to axis array given.
         * For full axis the result is both mean and variance of all members in array.
         * Otherwise there are two NDArrays with means and variances for
         * Axes can be put as the second NDArray or as int vector.
         *
         * the optional flag "keep_dims" can be set as T param
         */
        #if NOT_EXCLUDED(OP_moments)
        DECLARE_CUSTOM_OP(moments, 1, 2, false, 0, -2);
        #endif

        /**
         * embedding_lookup - search for submatrices in given matrix and retunts them
         * accordingly to index array given.
         */
        #if NOT_EXCLUDED(OP_embedding_lookup)
        DECLARE_CUSTOM_OP(embedding_lookup, 2, 1, false, 0, 1);
        #endif

        /**
         * dynamic_partition - partition a input tensor onto num_partitions
         * accordingly to index array given.
         *
         * the first param - NDArray to be partitioned.
         * the second param - index array
         * the third param (integer param) - num or partitions.
         *
         * returns a num of NDArrays as output
         */
        #if NOT_EXCLUDED(OP_dynamic_partition)
        DECLARE_CUSTOM_OP(dynamic_partition, 2, 1, false, 0, 1);
        #endif

        #if NOT_EXCLUDED(OP_dynamic_partition_bp)
        DECLARE_CUSTOM_OP(dynamic_partition_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * dynamic_stitch - merge partitions from the second param a input tensor
         * into a single tensor accordingly to index array given.
         *
         * the first param - index array
         * the second params - tensors to be merged
         *
         * returns a num of NDArrays as output
         *
         * the operation is inversion od dynamic_partition
         */
        #if NOT_EXCLUDED(OP_dynamic_stitch)
        DECLARE_CUSTOM_OP(dynamic_stitch, 2, 1, false, 0, 0);
        #endif

        /**
         * zero_fraction op.
         * compute a fraction of zeros in given array
         *
         * input param - an array (tensor)
         * output value - a real number with given type (e.g. float or double)
         */
        #if NOT_EXCLUDED(OP_zero_fraction)
        DECLARE_CUSTOM_OP(zero_fraction, 1, 1, false, 0, 0);
        #endif

        /**
         * xw_plus_b op.
         * multiply two first matrices and add third vector to each row of result
         *
         * input params:
         *   - 2D matrix NxM
         *   - 2D matrix MxN
         *   - 1D vector with N elements
         * output value - 2D matrix NxN as multiply of matrixes and add vector
         * Int args:
         *      0 - optional switcher of weights format, if int arg == 1 - mkldnn, else mmul
         */
        #if NOT_EXCLUDED(OP_xw_plus_b)
                DECLARE_CUSTOM_OP(xw_plus_b, 3, 1, false, 0, 0);
                DECLARE_CUSTOM_OP(xw_plus_b_bp, 4, 3, false, 0, 0);
        #endif

        /**
         * This operation is missed due it simplicy.
         * Input and output params are the same after operation.
         * Input - NDArray, output - NDArray with the same shape.
         */
        #if NOT_EXCLUDED(OP_stop_gradient)
        DECLARE_OP(stop_gradient, 1, 1, true);
        #endif

        #if NOT_EXCLUDED(OP_parallel_stack)
        DECLARE_CUSTOM_OP(parallel_stack, -1, 1, false, 0, 0);
        #endif

        /**
         * normalize_moments operation normalize already calculated mean and variation
         * accordingly to shift and count.
         * input params:
         *  - count of data
         *  - tensor with mean
         *  - tensor with variance (the same shape as before)
         *
         *  - optional floating point param shift.
         *
         *  returns a normalized pair mean and variance with the same shapes as input
         */
        #if NOT_EXCLUDED(OP_normalize_moments)
        DECLARE_CUSTOM_OP(normalize_moments, 3, 2, false, 1, 0);
        #endif

        /**
         * sufficient_statistics operation return calculated mean and variation with data count.
         * this operation is invert for moments
         * accordingly to shift and count.
         * input params:
         *  - input tensor
         *  - axes vector
         *
         *
         *  - optional floating point param shift.
         *  - optional int (as bool) keep_dimension
         *
         *  returns four tensors:
         *     - scalar tensor (data count)
         *     - sum elements of input (accross axises)
         *     - sum of squares of input (accross axises)
         *     - shift (if was given by input floating param)
         */
        #if NOT_EXCLUDED(OP_sufficient_statistics)
        DECLARE_CUSTOM_OP(sufficient_statistics, 2, 3, false, 0, 0);
        #endif

        /**
         * This op calculates weighted logarithmic loss of input
         * Input arguments
         *  0 - target
         *  1 - input
         *  2 - weights (scalar or vector with same as last dimension)
         *
         *  return value - a tensor with the same shape as target or input
         */
        #if NOT_EXCLUDED(OP_weighted_cross_entropy_with_logits)
        DECLARE_OP(weighted_cross_entropy_with_logits, 3, 1, true);
        #endif

        /**
         * This op calculates dropout of input
         * Input arguments
         *  0 - input tensor
         *  1 - noise_shape - (vector with shape to reduce) - optional
         *
         *  int parameter - seed for random numbers
         *  T parameter - probability (should be between 0 and 1)
         *  return value - a tensor with the same shape as target or input
         */
        #if NOT_EXCLUDED(OP_dropout)
        DECLARE_CONFIGURABLE_OP(dropout, 1, 1, true, 1, 1);
        #endif
        #if NOT_EXCLUDED(OP_dropout_bp)
        DECLARE_CONFIGURABLE_OP(dropout_bp, 2, 1, false, 1, 1);
        #endif

        /*  Calculates alpha weighted dropout
            T params:
                0 - drop probability
                1 - alpha value
                2 - alpha' value
                3 - beta value
         */
        #if NOT_EXCLUDED(OP_alpha_dropout_bp)
        DECLARE_CONFIGURABLE_OP(alpha_dropout_bp, 2, 1, false, 4, 1);
        #endif


        /**
         * bincount operation return a vector with element counted.
         *
         * input params:
         *  - input tensor - only int part are accepted
         *  - weights - the same shape tensor with integer weights for element (optional)
         *  default weight - 1,1,1..,1 for all values in the tensor
         *
         *  optional ints:
         *  - min_length - zero or greater
         *  - max_length - between min_length and max(input) + 1
         *
         *  returns four tensors:
         *     - vector tensor with length to min(max_len, max(input) + 1) with count
         *  of values in indexed place
         *
         */
        #if NOT_EXCLUDED(OP_bincount)
        DECLARE_CUSTOM_OP(bincount, 1, 1, false, 0, 0);
        #endif

        /**
         * broadcast_dynamic_shape op.
         *
         * input params:
         *    0 - the first shape (vector with shape)
         *    1 - the second shape (vector with shape)
         *
         * return value:
         *    vector with broadcasted shape
         */
        #if NOT_EXCLUDED(OP_broadcast_dynamic_shape)
        DECLARE_CUSTOM_OP(broadcast_dynamic_shape, 2, 1, false, 0, 0);
        #endif

        /**
         * matrix_determinant op.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: *) with determinant for all
         * M x M matricies
         */
        #if NOT_EXCLUDED(OP_matrix_determinant)
        DECLARE_CUSTOM_OP(matrix_determinant, 1, 1, false, 0, 0);
        #endif

        /**
         * log_matrix_determinant op.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: *) with log determinant for all
         * M x M matricies
         */

        #if NOT_EXCLUDED(OP_log_matrix_determinant)
        DECLARE_CUSTOM_OP(log_matrix_determinant, 1, 1, false, 0, 0);
        #endif

        /**
         * logdet op. Logarithm of the determinant of hermitian positive matricies.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: *) with log determinant for all
         * M x M matricies
         */

        #if NOT_EXCLUDED(OP_logdet)
        DECLARE_CUSTOM_OP(logdet, 1, 1, false, 0, 0);
        #endif

        /**
         * matrix_solve_ls op (lstsq) - solves one or more linear least-squares problems.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * N) - left parts of equations
         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
         *
         * float args:
         *    0 - l2_regularizer (default 0. and only for 0 implemented)
         *
         * boolean args:
         *    0 - fast - default is true (optional) - use Cholesky decomposition instead QR decomposition of matricies.
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: * N * K) with solutions
         *
         */
        #if NOT_EXCLUDED(OP_lstsq)
        DECLARE_CUSTOM_OP(lstsq, 2, 1, false, 0, 0);
        #endif

        /* solve_ls - analog of lstsq op with another solution approach
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * N) - left parts of equations
         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
         *
         * float args:
         *    0 - l2_regularizer (default 0. and only for 0 implemented)
         *
         * boolean args:
         *    0 - fast - default is true (optional) - use Cholesky decomposition instead QR decomposition of matricies.
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: * N * K) with solutions
         *
         * Note: if fast is false - then l2_regularizer arg is ignored and used lstsq method due QR decomposition
         * */
        #if NOT_EXCLUDED(OP_solve_ls)
                DECLARE_CUSTOM_OP(solve_ls, 2, 1, false, 0, 0);
        #endif

        /**
         * matrix_inverse op. - make inverse for all 2D square matricies found in the input tensor
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: * M * M) with inverse M x M matricies in it
         */
        #if NOT_EXCLUDED(OP_matrix_inverse)
        DECLARE_OP(matrix_inverse, 1, 1, true);
        #endif

        /**
         * triangular_solve op. - reverse Gaussian method for solve systems of linear equations.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M) - left parts of equations
         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
         *
         * boolean args:
         *    0 - lower - default is true (optional) - left part is lower triangular matrix
         *    1 - adjoint - default is false (optional) - indicate input matrix or its adjoint (hermitian addition) should be used
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: * M * K) with solutions
         *
         */
        #if NOT_EXCLUDED(OP_triangular_solve)
        DECLARE_CUSTOM_OP(triangular_solve, 2, 1, false, 0, 0);
        #endif

        /**
         * solve op. - solve systems of linear equations - general method.
         *
         * input params:
         *    0 - the tensor with dimension (x * y * z * ::: * M * M) - left parts of equations
         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
         *
         * boolean args:
         *    0 - adjoint - default is false (optional) - indicate input matrix or its adjoint (hermitian addition) should be used
         *
         * return value:
         *    tensor with dimension (x * y * z * ::: * M * K) with solutions
         *
         */
        #if NOT_EXCLUDED(OP_solve)
        DECLARE_CUSTOM_OP(solve, 2, 1, true, 0, 0);
        #endif

        /**
         * lu op. - make LUP decomposition of given batch of 2D square matricies
         *
         * input params:
         *    0 - float tensor with dimension (x * y * z * ::: * M * M)
         *
         * return value:
         *    0 - float tensor with dimension (x * y * z * ::: * M * M) with LU M x M matricies in it
         *    1 - int (32 or 64) batched vector of permutations with length M - shape (x * y * z * ::: * M)
         *
         * int argument:
         *    0 - data type of output permutaion vector (int32 or int64), optional, default INT32
         */

        #if NOT_EXCLUDED(OP_matrix_inverse)
        DECLARE_CUSTOM_OP(lu, 1, 2, false, 0, 0);
        #endif

        /**
         * sequence_mask op. - make mask for given tensor filled by (j > x[i_1, i_2,...,i_n]) -> z[i_1, i_2,...,i_n,j]
         *
         * input params:
         *    0 - the ND-tensor filled by integer-like values
         *
         * optional int param - maxlength (maxlength >= max(x)). By default maxlength = max(x).
         * return value:
         *    (N+1)D tensor filled by 0 and 1 accordingly the mask
         */
        #if NOT_EXCLUDED(OP_sequence_mask)
        DECLARE_CUSTOM_OP(sequence_mask, 1, 1, false, 0, 0);
        #endif
        /**
         * segment_max op. - make a tensor filled by max values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with max values according to indices sets.
         */

        #if NOT_EXCLUDED(OP_segment_max)
        DECLARE_CUSTOM_OP(segment_max, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_segment_max_bp)
        DECLARE_CUSTOM_OP(segment_max_bp, 3, 2, false, 0, 0);
        #endif

        /**
         * segment_min op. - make a tensor filled by min values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with min values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_segment_min)
        DECLARE_CUSTOM_OP(segment_min, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_segment_min_bp)
        DECLARE_CUSTOM_OP(segment_min_bp, 3, 2, false, 0, 0);
        #endif

        /**
         * segment_sum op. - make a tensor filled by sum of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with sum of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_segment_sum)
        DECLARE_CUSTOM_OP(segment_sum, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_segment_sum_bp)
        DECLARE_CUSTOM_OP(segment_sum_bp, 3, 2, false, 0, 0);
        #endif

        /**
         * segment_prod op. - make a tensor filled by product of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with product of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_segment_prod)
        DECLARE_CUSTOM_OP(segment_prod, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_segment_prod_bp)
        DECLARE_CUSTOM_OP(segment_prod_bp, 3, 2, false, 0, 0);
        #endif
        /**
         * segment_mean op. - make a tensor filled by average of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with average of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_segment_mean)
        DECLARE_CUSTOM_OP(segment_mean, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_segment_mean_bp)
        DECLARE_CUSTOM_OP(segment_mean_bp, 3, 2, false, 0, 0);
        #endif

        /**
         * unsorted_segment_max op. - make a tensor filled by max values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * return value:
         *    tensor with max values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_max)
        DECLARE_CUSTOM_OP(unsorted_segment_max, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_max_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_max_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * unsorted_segment_min op. - make a tensor filled by min values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * integer param:
         *    0 - num of segments
         *
         * return value:
         *    tensor with min values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_min_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_min, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_min_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_min_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * unsorted_segment_sum op. - make a tensor filled by sum of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * integer param:
         *    0 - num of segments
         *
         * return value:
         *    tensor with sum of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_sum)
        DECLARE_CUSTOM_OP(unsorted_segment_sum, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_sum_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_sum_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * unsorted_segment_prod op. - make a tensor filled by product of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * integer param:
         *    0 - num of segments
         *
         * return value:
         *    tensor with product of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_prod)
        DECLARE_CUSTOM_OP(unsorted_segment_prod, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_prod_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_prod_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * unsorted_segment_mean op. - make a tensor filled by average of values according to index tensor given.
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * integer param:
         *    0 - num of segments
         *
         * return value:
         *    tensor with average of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_mean)
        DECLARE_CUSTOM_OP(unsorted_segment_mean, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_mean_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_mean_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * unsorted_segment_sqrt_n op. - computes the sum along segments of a tensor divided by the sqrt(N).
         *
         * input params:
         *    0 - the tensor with data;
         *    1 - the tensor with indices.
         *
         * integer param:
         *    0 - num of segments
         *
         * return value:
         *    tensor with average of values according to indices sets.
         */
        #if NOT_EXCLUDED(OP_unsorted_segment_sqrt)
        DECLARE_CUSTOM_OP(unsorted_segment_sqrt_n, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_unsorted_segment_sqrt_n_bp)
        DECLARE_CUSTOM_OP(unsorted_segment_sqrt_n_bp, 3, 2, false, 0, 1);
        #endif

        /**
         * extract_image_patches op - Extract patches from images and put them in the "depth" output dimension.
         *
         * input params:
         *    0 - images tensor (4D)
         *
         * int params:
         *    0 - ksize_rows
         *    1 - ksize_cols
         *    2 - strides_rows
         *    3 - strides_cols
         *    4 - rates_rows
         *    5 - rates_cols
         *    6 - padding_type - 0 - equiv 'VALID', 1 - 'SAME'
         */
        #if NOT_EXCLUDED(OP_extract_image_patches)
        DECLARE_CUSTOM_OP(extract_image_patches, 1, 1, false, 0, 7);
        #endif

        /**
         * draw_bounding_boxes op - modified input image with given colors exept given boxes.
         *
         * input params:
         *    0 - images tensor (4D) with shape {batch, width, height, channels}, where channes is 1 (BW image),
         * 3 (RGB) or 4 (RGBA)
         *    1 - boxes tensor (3D) with shape {batch, number_of_boxes, 4} where last dimension encoded as
         * (y_min, x_min, y_max, x_max), all values in between 0. and 1.
         *    2 - colours tensor (2D) with shape {number_of_boxes, channels} -- bordering color set (palette)
         *
         * output:
         *    0 - 4D tensor with same shape as images (input 0)
         */
        #if NOT_EXCLUDED(OP_draw_bounding_boxes)
        DECLARE_OP(draw_bounding_boxes, 3, 1, true);
        #endif

        /**
         * roll - op porting from numpy (https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.roll.html)
         *
         * input params:
         *    0 - NDArray
         *
         * int params:
         *    0 - shift
         *    1 - axe 1
         *    2 - axe 2
         *    ...
         *    N - axe N
         *
         *    All axes are optional and should be between 0 and input->rankOf(). Of course, all axes can be repeated.
         *
         * output:
         *    0 - NDArray with the same shape as input.
         */
        #if NOT_EXCLUDED(OP_roll)
        DECLARE_CONFIGURABLE_OP(roll, 1, 1, true, 0, 1);
        #endif

        /**
         * lin_space - op porting from TF (https://www.tensorflow.org/api_docs/python/tf/lin_space)
         *
         * optional input params:
         *    0 - startVal - NDArray scalar (float point)
         *    1 - finishVal - NDArray scalar (float point)
         *    2 - numOfElements - NDArray scalar (integer)
         * Optional:
         * T args
         *    0 - startVal
         *    1 - finishVal]
         *    2 - numOfElements
         * output:
         *    0 - 1D NDArray with the same type as input and length as given with numOfElements param.
         */
        #if NOT_EXCLUDED(OP_lin_space)
        DECLARE_CUSTOM_OP(lin_space, 0, 1, false, 0, 0);
        #endif

        /**
         * reduction_sum - tf.reduction_sum operation
         *
         * input params:
         *    0 - NDArray
         *
         * T_ARG param (optional):
         * 0 - keep_dims != 0.
         *
         * int params (optional):
         *    0 - axe 1
         *    1 - axe 2
         *    ...
         *    N-1 axe N
         *
         *    All axes are optional and should be between 0 and input->rankOf() - 1
         *
         * output:
         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
         */
        #if NOT_EXCLUDED(OP_reduce_sum)
        DECLARE_CUSTOM_OP(reduce_sum, 1, 1, false, 0, 0);
        #endif

        #if NOT_EXCLUDED(OP_reduce_sum_bp)
        DECLARE_CUSTOM_OP(reduce_sum_bp, 2, 1, false, 0, 0);
        #endif

        /**
         * reduction_prod - tf.reduction_prod operation
         *
         * input params:
         *    0 - NDArray
         *
         * T_ARG param (optional):
         * 0 - keep_dims != 0.
         *
         * int params (optional):
         *    0 - axe 1
         *    1 - axe 2
         *    ...
         *    N-1 axe N
         *
         *    All axes are optional and should be between 0 and input->rankOf() - 1
         *
         * output:
         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
         */
        #if NOT_EXCLUDED(OP_reduce_prod)
        DECLARE_CUSTOM_OP(reduce_prod, 1, 1, false, 0, 0);
        #endif

        #if NOT_EXCLUDED(OP_reduce_prod_bp)
        DECLARE_CUSTOM_OP(reduce_prod_bp, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates min of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate mins for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate min along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated mins
        */
        #if NOT_EXCLUDED(OP_reduce_min)
        DECLARE_CUSTOM_OP(reduce_min, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_min_bp)
        DECLARE_CUSTOM_OP(reduce_min_bp, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates max of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate maxes for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate max along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated maxes
        */
        #if NOT_EXCLUDED(OP_reduce_max)
        DECLARE_CUSTOM_OP(reduce_max, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_max_bp)
        DECLARE_CUSTOM_OP(reduce_max_bp, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates norm1 of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate norm1 for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate norm1 along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated norm1
        */
        #if NOT_EXCLUDED(OP_reduce_norm1)
        DECLARE_CUSTOM_OP(reduce_norm1, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_norm1_bp)
        DECLARE_CUSTOM_OP(reduce_norm1_bp, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates norm2 of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate norm2 for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate norm2 along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated norm2
        */
        #if NOT_EXCLUDED(OP_reduce_norm2)
        DECLARE_CUSTOM_OP(reduce_norm2, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_norm2_bp)
        DECLARE_CUSTOM_OP(reduce_norm2_bp, 2, 1, false, 0, 0);
        #endif


       /**
        * This op calculates squared norm of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate squared norm for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate squared norm along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated norm
        */
        #if NOT_EXCLUDED(OP_reduce_sqnorm)
        DECLARE_CUSTOM_OP(reduce_sqnorm, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_sqnorm_bp)
        DECLARE_CUSTOM_OP(reduce_sqnorm_bp, 2, 1, false, 0, 0);
        #endif

       /**
        * This op calculates norm max of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate norm max for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate norm max along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated norm
        */
        #if NOT_EXCLUDED(OP_reduce_norm_max)
        DECLARE_CUSTOM_OP(reduce_norm_max, 1, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_reduce_norm_max_bp)
        DECLARE_CUSTOM_OP(reduce_norm_max_bp, 2, 1, false, 0, 0);
        #endif

        /**
        * This op calculates mean of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate mean for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated means
        */
        #if NOT_EXCLUDED(OP_reduce_mean)
        DECLARE_CUSTOM_OP(reduce_mean, 1, 1, false, 0, 0);
        #endif

        #if NOT_EXCLUDED(OP_reduce_mean_bp)
        DECLARE_CUSTOM_OP(reduce_mean_bp, 2, 1, false, 0, 0)
        #endif
        /**
        * This op calculates sample variance of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate mean for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *   biasCorrected -  if non zero, then bias correction will be applied, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated means
        */
        DECLARE_CUSTOM_OP(reduce_variance, 1, 1, false, 0, 0);
        DECLARE_CUSTOM_OP(reduce_variance_bp, 2, 1, false, 0, 0)

        /**
        * This op calculates sample standard deviation of elements along given dimensions
        *
        * input array:
        *    x: tensor to calculate mean for
        *
        * float arguments:
        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
        *   biasCorrected - if non zero, then bias correction will be applied, default value is zero
        *
        * int arguments:
        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
        *
        * output array:
        *    reduced tensor with calculated means
        */
        DECLARE_CUSTOM_OP(reduce_stdev, 1, 1, false, 0, 0);
        DECLARE_CUSTOM_OP(reduce_stdev_bp, 2, 1, false, 0, 0)

        /**
        * This op calculates backprop dot for two tensors along given dimensions
        *
        * input array:
        *    x: tensor to calculate dot for
        *    y: tensor to calculate dot for
        *    z: tensor with gradient output of the FF dot for x and y
        *
        * int arguments:
        *   list of integers - dimensions to calculate dot along,
        *   default corresponds to empty list in which case calculation
        *   is performed for all dimensions and scalar is returned.
        *
        * output array:
        *   the tensor with calculated backproped dots
        *
        */

        #if NOT_EXCLUDED(OP_reduce_dot_bp)
        DECLARE_CUSTOM_OP(reduce_dot_bp, 3, 2, false, 0, 0);
        #endif
        /**
         * reduce_logsumexp - tf.reduce_logsumexe operation
         *
         * input params:
         *    0 - NDArray (input)
         *    1 - 1D NDArray (axis) (optional) - integer array
         *
         * T_ARG param (optional):
         * 0 - keep_dims != 0.
         *
         * int params (optional):
         *    0 - axe 1
         *    1 - axe 2
         *    ...
         *    N-1 axe N
         *
         *  CAUTION: All axes are optional and should be between 0 and input->rankOf() - 1
         *  and put either with second param or as integers but not both
         *
         * output:
         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
         */
        #if NOT_EXCLUDED(OP_reduce_logsumexp)
        DECLARE_CUSTOM_OP(reduce_logsumexp, 1, 1, false, 0, 0);
        #endif

       /**
        * Copy a tensor setting everything outside a central band in each innermost matrix
        *
        * input array:
        *    x: given tensor with shape {..., M, N} - as vector (matrix) of matricies MxN
        *
        * int arguments:
        *   lower band
        *   upper band
        *
        * output array:
        *   matrix with given bands between lower and upper diagonals
        *
        */

        #if NOT_EXCLUDED(OP_matrix_band_part)
        DECLARE_CONFIGURABLE_OP(matrix_band_part, 1, 1, true, 0, 2);
        #endif


        #if NOT_EXCLUDED(OP_Assert)
        DECLARE_OP(Assert, 1, 1, false);
        #endif

        /**
         * image.non_max_suppression ops.
         * input:
         *     0 - boxes - 2D-tensor with shape (num_boxes, 4) by float type
         *     1 - scales - 1D-tensor with shape (num_boxes) by float type
         *     2 - output_size - 0D-tensor by int type (optional)
         * float args:
         *     0 - overlap_threshold - threshold value for overlap checks (optional, by default 0.5)
         *     1 - score_threshold - the threshold for deciding when to remove boxes based on score (optional, by default -inf)
         * int args:
         *     0 - output_size - as arg 2 used for same target. Eigher this or arg 2 should be provided.
         *
         * output:
         *     - vector with size M, where M <= output_size by int type
         *
         * */
        #if NOT_EXCLUDED(OP_image_non_max_suppression)
        DECLARE_CUSTOM_OP(non_max_suppression, 2, 1, false, 0, 0);
        #endif
        #if NOT_EXCLUDED(OP_image_non_max_suppression_v3)
                DECLARE_CUSTOM_OP(non_max_suppression_v3, 2, 1, false, 0, 0);
        #endif

        /*
         * image.non_max_suppression_overlaps op.
         * input:
         *     0 - boxes - 2D-tensor with shape (num_boxes, 4) by float type
         *     1 - scales - 1D-tensor with shape (num_boxes) by float type
         *     2 - output_size - 0D-tensor by int type (optional)
         * float args:
         *     0 - overlap_threshold - threshold value for overlap checks (optional, by default 0.5)
         *     1 - score_threshold - the threshold for deciding when to remove boxes based on score (optional, by default -inf)
         * int args:
         *     0 - output_size - as arg 2 used for same target. Eigher this or arg 2 should be provided.
         *
         * output:
         *     0 - 1D integer tensor with shape [M], epresenting the selected indices from the overlaps tensor, where M <= max_output_size
         * */
        #if NOT_EXCLUDED(OP_image_non_max_suppression_overlaps)
        DECLARE_CUSTOM_OP(non_max_suppression_overlaps, 2, 1, false, 0, 0);
        #endif

        /*
         * cholesky op - decomposite positive square symetric matrix (or matricies when rank > 2).
         * input:
         *     0 - matricies - tensor with shape (..., N, N) by float type
         *
         * output - lower triangular matrix (matricies when rank > 2) with the same shape as input.
         * */
        #if NOT_EXCLUDED(OP_cholesky)
        DECLARE_OP(cholesky, 1, 1, true);
        #endif
        /*
         * nth_element - apply nth_element for last dimension of input tensor
         * input array:
         *     0 - input array
         *     1 - scalar tensor with n for operation. n should be less than last dimension
         *
         * output:
         *    0 - NDArray with the same shape as input
         */
        #if NOT_EXCLUDED(OP_nth_element)
        DECLARE_CUSTOM_OP(nth_element, 2, 1, false, 0, 0);
        #endif

        /**
         * This op checks for Inf/NaN values within input array, and throws exception if there's at least one
         */
        #if NOT_EXCLUDED(OP_check_numerics)
        DECLARE_CUSTOM_OP(check_numerics, 2, 1, true, 0, 0);
        #endif
/**
         * fake_quant_with_min_max_vals - tf.quantization.fake_quant_with_min_max_vars
         *
         * input params:
         *    0 - NDArray (input)
         *    1 - 0D Tensor - min value
         *    2 - 0D Tensor - max value
         *
         * int params (optional):
         *    0 - num_bits (allowed interval [2, 16], default 8)
         *    1 - narrow_range (default False)
         *
         * output:
         *    0 - NDArray with the same shape as input
         */
        #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars)
        DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars, 3, 1, true, 0, -2);
        #endif

/**
         * fake_quant_with_min_max_vals_per_channel - tf.quantization.fake_quant_with_min_max_vars_per_channel
         *
         * input params:
         *    0 - NDArray (input) - at least 2D.
         *    1 - 1D Tensor - min values (min length equals to last dim of input)
         *    2 - 1D Tensor - max value (length equals to min)
         *
         * int params (optional):
         *    0 - num_bits (allowed interval [2, 16], default 8)
         *    1 - narrow_range (default False)
         *
         * output:
         *    0 - NDArray with the same shape as input
         */
        #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel)
                DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars_per_channel, 3, 1, true, 0, -2);
        #endif

        /**
         * compare_and_bitpack - Compare values of input to threshold and pack resulting bits into a uint8
         *
         * input params:
         *    0 - NDArray (input). Note: last dimension should be divisibly by 8
         *    1 - 0D Tensor - threshold to compare against. Note: when input is bool type, the threshold is ignored
         *
         *
         * output:
         *    0 - NDArray with the shape as {input.dim0,...input.dimLast/8} and type uint8
         */
        #if NOT_EXCLUDED(OP_compare_and_bitpack)
        DECLARE_CUSTOM_OP(compare_and_bitpack, 2, 1, false, 0, 0);
        #endif
    }
}

#endif
-												Update copyrights remove attic and relocate elsewhere

											
										
										
											2021-02-09 05:16:31 +01:00
+								/*
 								 *  ******************************************************************************
 								 *  *
 								 *  *
 								 *  * This program and the accompanying materials are made available under the
 								 *  * terms of the Apache License, Version 2.0 which is available at
 								 *  * https://www.apache.org/licenses/LICENSE-2.0.
 								 *  *
 								 *  * See the NOTICE file distributed with this work for additional
 								 *  * information regarding copyright ownership.
 								 *  * Unless required by applicable law or agreed to in writing, software
 								 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 								 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 								 *  * License for the specific language governing permissions and limitations
 								 *  * under the License.
 								 *  *
 								 *  * SPDX-License-Identifier: Apache-2.0
 								 *  *****************************************************************************
 								 */
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
 								//
 								//  @author raver119@gmail.com
 								//
 								#ifndef LIBND4J_HEADERS_PARITY_H
 								#define LIBND4J_HEADERS_PARITY_H
 								#include <ops/declarable/headers/common.h>
-												libnd4j polishing (#273)

* initial set of include changes

Signed-off-by: raver119 <raver119@gmail.com>

* one more tweak

Signed-off-by: raver119 <raver119@gmail.com>

* few more rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* few more rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* few more rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* cuda includes rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* java update

Signed-off-by: raver119 <raver119@gmail.com>

* = namespace changed to sd
- few CMake variables renamed with SD_ prefix

Signed-off-by: raver119 <raver119@gmail.com>

* java update

Signed-off-by: raver119 <raver119@gmail.com>

* LoopKind minor fix

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes

Signed-off-by: raver119 <raver119@gmail.com>

* sanitizer is optional now

Signed-off-by: raver119 <raver119@gmail.com>

* dev tests updated

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes

Signed-off-by: raver119 <raver119@gmail.com>

* last update

Signed-off-by: raver119 <raver119@gmail.com>

* java update

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2020-03-02 10:49:41 +01:00
+								namespace sd {
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								    namespace ops {
 								        /**
 								         * This operation returns index of max element in a given NDArray (optionally: along given dimension(s))
 								         * Expected input:
 								         * 0: N-dimensional array
 								         * 1: optional axis vector
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args:
 								         * 0: optional axis
 								         */
 								        #if NOT_EXCLUDED(OP_argmax)
 								        DECLARE_CUSTOM_OP(argmax, 1, 1, false, 0, -2);
 								        #endif
 								        /**
 								         * This operation returns index of min element in a given NDArray (optionally: along given dimension(s))
 								         * Expected input:
 								         * 0: N-dimensional array
 								         * 1: optional axis vector
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args:
 								         * 0: optional axis
 								         */
 								        #if NOT_EXCLUDED(OP_argmin)
 								        DECLARE_CUSTOM_OP(argmin, 1, 1, false, 0, -2);
 								        #endif
-												- new implementations for Index Reductions (#421)

* - new implementations for Index Reductions
- small fix in the legacy reduction
- disabled index reduction bench tests inside Playground

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Allow LIBND4J_TYPES

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* index reduction stuff split into bunch of units

* meh

* IMax switched to new impl

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* minor fix + test

* minor fix

* index range fix

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* noop on empty outputs

* minor fix

* minor fix

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* ArgMax replaces IMax

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* argmax/argmin/argamax/argamin shape functions updated

* ArgAmax/ArgAmin/ArgMin replaces IAMax/IAMin/IMin

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* argmax/argmin/argamax/argamin CUDA

* IMax replaced in dl4j

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* Codegen output

* imports fixed

Signed-off-by: raver119@gmail.com <raver119@gmail.com>

* fix compilation issue

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Auto-generate compilation units

Signed-off-by: Abdelrauf <rauf@konduit.ai>

* Should fix NDArray refactored function calls in indexReductions.cu

Signed-off-by: Abdelrauf <rauf@konduit.ai>

Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
											
										
										
											2020-05-14 12:41:55 +02:00
+								        /**
 								         * This operation returns index of absolute max element in a given NDArray (optionally: along given dimension(s))
 								         * Expected input:
 								         * 0: N-dimensional array
 								         * 1: optional axis vector
 								         *
 								         * Int args:
 								         * 0: optional axis
 								         */
 								        #if NOT_EXCLUDED(OP_argamax)
 								        DECLARE_CUSTOM_OP(argamax, 1, 1, false, 0, -2);
 								        #endif
 								        /**
 								         * This operation returns index of absolute min element in a given NDArray (optionally: along given dimension(s))
 								         * Expected input:
 								         * 0: N-dimensional array
 								         * 1: optional axis vector
 								         *
 								         * Int args:
 								         * 0: optional axis
 								         */
 								        #if NOT_EXCLUDED(OP_argamin)
 								        DECLARE_CUSTOM_OP(argamin, 1, 1, false, 0, -2);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * This operation provides various normalization modes:
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * 0: frobenius
 								         * 1: euclidean (norm2)
 								         * 2: norm1
 								         * 3: norm2
 								         * 4: inf-norm
 								         * 5: p-norm
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
 								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args:
 								         * 0...: axis
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T args:
 								         * 0: norm mode
 								         * 1: p for p-norm
 								         */
 								        #if NOT_EXCLUDED(OP_norm)
 								        DECLARE_REDUCTION_OP(norm, 1, 1, false, 1, -2);
 								        #endif
 								        /**
-												Shyrma docs (#222)

* - documenting and profiling matrix_set_diag cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - correct formula of pnorm pooling in cuda 2d/3d kernels
- remove helper matrix_diag which duplicates work of helper matrix_set_diag

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-09-02 15:25:58 +02:00
+								        * Inserts elements provided by diagonal array into the main diagonal of innermost matrices of input array
 								        *
 								        * Input arrays:
-												[WIP] bunch of improvements (#257)

* - profiling bias_add op
- add some docementation

Signed-off-by: Yurii <yurii@skymind.io>

* - minor change

Signed-off-by: Yurii <yurii@skymind.io>

* - provide addBias cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - improve shape::getIndexOfffset and change its signature

Signed-off-by: Yurii <yurii@skymind.io>

* - same as previous

Signed-off-by: Yurii <yurii@skymind.io>

* - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements

Signed-off-by: Yurii <yurii@skymind.io>

* - minor changes in flatten

Signed-off-by: Yurii <shyrma@skymind.io>

* - add function shape::getIndexOffsetOrdered

Signed-off-by: Yurii <shyrma@skymind.io>

* - correct shape::getIndexOffsetOrdered()

Signed-off-by: Yurii <shyrma@skymind.io>

* - move getIndexOffsetOrdered to flatten.h header in order to isolate this function

Signed-off-by: Yurii <shyrma@skymind.io>

											
										
										
											2019-09-11 19:12:09 +02:00
+								        *  0: input array, considered as batch of matrices
 								        *  1: diagonal array containing elements to be inserted into input array,
 								        *     following rank condition should be satisfied: diagonal_rank = input_rank - 1,
 								        *     the shapes of diagonal and input arrays must be equal except last dimension of input array,
 								        *     for example if input_shape = [A,B,C,D] then diagonal_shape = [A,B,C],
 								        *     also last dimension of diagonal array should be equal to smaller of last and last but one input dimensions
 								        *     that is: diagonal_shape[-1] = min(input_shape[-1], input_shape[-2])
-												Shyrma docs (#222)

* - documenting and profiling matrix_set_diag cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - correct formula of pnorm pooling in cuda 2d/3d kernels
- remove helper matrix_diag which duplicates work of helper matrix_set_diag

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-09-02 15:25:58 +02:00
+								        *
 								        * Output array:
-												[WIP] bunch of improvements (#257)

* - profiling bias_add op
- add some docementation

Signed-off-by: Yurii <yurii@skymind.io>

* - minor change

Signed-off-by: Yurii <yurii@skymind.io>

* - provide addBias cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - improve shape::getIndexOfffset and change its signature

Signed-off-by: Yurii <yurii@skymind.io>

* - same as previous

Signed-off-by: Yurii <yurii@skymind.io>

* - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements

Signed-off-by: Yurii <yurii@skymind.io>

* - minor changes in flatten

Signed-off-by: Yurii <shyrma@skymind.io>

* - add function shape::getIndexOffsetOrdered

Signed-off-by: Yurii <shyrma@skymind.io>

* - correct shape::getIndexOffsetOrdered()

Signed-off-by: Yurii <shyrma@skymind.io>

* - move getIndexOffsetOrdered to flatten.h header in order to isolate this function

Signed-off-by: Yurii <shyrma@skymind.io>

											
										
										
											2019-09-11 19:12:09 +02:00
+								        *  0: has the same shape as input, corresponding diagonal elements are substituted
-												Shyrma docs (#222)

* - documenting and profiling matrix_set_diag cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - correct formula of pnorm pooling in cuda 2d/3d kernels
- remove helper matrix_diag which duplicates work of helper matrix_set_diag

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-09-02 15:25:58 +02:00
+								        */
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #if NOT_EXCLUDED(OP_matrix_set_diag)
 								        DECLARE_CONFIGURABLE_OP(matrix_set_diag, 2, 1, false, 0, 0);
 								        #endif
 								        /**
-												[WIP] bunch of improvements (#257)

* - profiling bias_add op
- add some docementation

Signed-off-by: Yurii <yurii@skymind.io>

* - minor change

Signed-off-by: Yurii <yurii@skymind.io>

* - provide addBias cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - improve shape::getIndexOfffset and change its signature

Signed-off-by: Yurii <yurii@skymind.io>

* - same as previous

Signed-off-by: Yurii <yurii@skymind.io>

* - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements

Signed-off-by: Yurii <yurii@skymind.io>

* - minor changes in flatten

Signed-off-by: Yurii <shyrma@skymind.io>

* - add function shape::getIndexOffsetOrdered

Signed-off-by: Yurii <shyrma@skymind.io>

* - correct shape::getIndexOffsetOrdered()

Signed-off-by: Yurii <shyrma@skymind.io>

* - move getIndexOffsetOrdered to flatten.h header in order to isolate this function

Signed-off-by: Yurii <shyrma@skymind.io>

											
										
										
											2019-09-11 19:12:09 +02:00
+								        * Inserts elements provided by diagonal array into the main diagonal of innermost matrices of output array,
 								        * rest output elements are set to zeros
 								        *
 								        * Input array:
 								        *    diagonal: array containing elements to be inserted into output array,
 								        *              following rank condition is present: diagonal_rank = ouput_rank - 1
 								        *
 								        * Output array:
 								        *   0: is considered as batch of matrices, if for example diagonal array has shape [A,B,C] then output array has shape [A,B,C,C]
 								        */
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        DECLARE_CUSTOM_OP(matrix_diag, 1, 1, false, 0, 0);
 								        /**
 								        * This op calculates regularized incomplete beta integral Ix(a, b).
 								        * Implementation is based on two algorithms depending on input values of a and b:
-												[WIP] bunch of improvements (#257)

* - profiling bias_add op
- add some docementation

Signed-off-by: Yurii <yurii@skymind.io>

* - minor change

Signed-off-by: Yurii <yurii@skymind.io>

* - provide addBias cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - improve shape::getIndexOfffset and change its signature

Signed-off-by: Yurii <yurii@skymind.io>

* - same as previous

Signed-off-by: Yurii <yurii@skymind.io>

* - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements

Signed-off-by: Yurii <yurii@skymind.io>

* - minor changes in flatten

Signed-off-by: Yurii <shyrma@skymind.io>

* - add function shape::getIndexOffsetOrdered

Signed-off-by: Yurii <shyrma@skymind.io>

* - correct shape::getIndexOffsetOrdered()

Signed-off-by: Yurii <shyrma@skymind.io>

* - move getIndexOffsetOrdered to flatten.h header in order to isolate this function

Signed-off-by: Yurii <shyrma@skymind.io>

											
										
										
											2019-09-11 19:12:09 +02:00
+								        * - when a and b are both >  maxValue (3000.), then Gauss-Legendre quadrature method is applied
 								        * - when a and b are both <= maxValue (3000.), then modified Lentz’s algorithm for continued fractions is applied
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * Input arrays:
-												[WIP] bunch of improvements (#257)

* - profiling bias_add op
- add some docementation

Signed-off-by: Yurii <yurii@skymind.io>

* - minor change

Signed-off-by: Yurii <yurii@skymind.io>

* - provide addBias cuda kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - improve shape::getIndexOfffset and change its signature

Signed-off-by: Yurii <yurii@skymind.io>

* - same as previous

Signed-off-by: Yurii <yurii@skymind.io>

* - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements

Signed-off-by: Yurii <yurii@skymind.io>

* - minor changes in flatten

Signed-off-by: Yurii <shyrma@skymind.io>

* - add function shape::getIndexOffsetOrdered

Signed-off-by: Yurii <shyrma@skymind.io>

* - correct shape::getIndexOffsetOrdered()

Signed-off-by: Yurii <shyrma@skymind.io>

* - move getIndexOffsetOrdered to flatten.h header in order to isolate this function

Signed-off-by: Yurii <shyrma@skymind.io>

											
										
										
											2019-09-11 19:12:09 +02:00
+								        *    a: defines power t^{a-1}, must be > 0, type float.
 								        *    b: defines power (1-t)^{b-1}, must be > 0, type float.
 								        *    x: defines upper limit of integration, must be within (0 <= x <= 1) range, type float.
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * Output array:
 								        *    0: values of  regularized incomplete beta integral that corresponds to variable upper limit x, type float
 								        *
 								        * Three input and one output arrays must have the same shape
 								        */
 								        #if NOT_EXCLUDED(OP_betainc)
 								        DECLARE_CONFIGURABLE_OP(betainc, 3, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation is added for compatibility purposes mostly.
 								         * PLEASE NOTE: Please consider using Add instead
 								         * Expected arguments:
 								         * 0: N-dimensional input
 								         * 1: bias vector
 								         */
 								        #if NOT_EXCLUDED(OP_biasadd)
 								        DECLARE_CUSTOM_OP(biasadd, 2, 1, true, 0, 0);
 								        DECLARE_CUSTOM_OP(biasadd_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * Returns a diagonal tensor with a given diagonal values. Given a diagonal, this operation returns a tensor with the diagonal and everything else padded with zeros.
 								         */
 								        #if NOT_EXCLUDED(OP_diag)
 								        DECLARE_CUSTOM_OP(diag, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * Returns a diagonal tensor with a given diagonal values. Given a diagonal, this operation returns a tensor with the diagonal and everything else padded with zeros.
 								         */
 								        #if NOT_EXCLUDED(OP_diag_part)
 								        DECLARE_CUSTOM_OP(diag_part, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * Returns a diagonal vector for any submatricies with in a given tensor.
 								         * It is an op inverse to matrix_set_giag.
 								         * Using input tensor as batched 2D diagonals flat them to vector (1D) with diagonal values.
 								         *
 								         * Input : batched tensor with rank >=2
 								         * Output: tensor with rank lesser by 1 from input
 								         */
-												Shugeo qr (#153)

* Added qr op implementation. Initial version.

* Fixed doc for qr op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Implementation of QR decomposition. CPU platform version.

* Added a pair of tests for qr op testing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* QR implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected norm using.

* Properly calculated intermediate results with QR decomposition.

* Another step to implement QR algorithm by householder.

* Cpu implementatio for QR decomposition. The first working edition.

* Corrected test to QR decomposition.

* Added tad multithreading with QR implementation.

* Finished cpu implementation for QR decomposition helpers.

* Refactored tests and improved multithreading.

* Refactored QR cpu implementation and update cuda implementation helpers.

* Cuda QR helper implementation. The first working edition.

* Eliminated waste prints.

* Restore multithreading with cuda implementation.

* Ops names corrected

* Refactored qr op helpers to optimize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste manual ticking.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored memory allocation to avoid waste memory usage.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matrixMinor method both for cuda and cpu platforms.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored method of vmul to use raw buffers instead type conversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored temporary array of matricies.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-22 11:59:36 +01:00
+								        #if NOT_EXCLUDED(OP_matrix_diag_part)
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        DECLARE_CUSTOM_OP(matrix_diag_part, 1, 1, false, 0, 0);
-												Shugeo qr (#153)

* Added qr op implementation. Initial version.

* Fixed doc for qr op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Implementation of QR decomposition. CPU platform version.

* Added a pair of tests for qr op testing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* QR implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected norm using.

* Properly calculated intermediate results with QR decomposition.

* Another step to implement QR algorithm by householder.

* Cpu implementatio for QR decomposition. The first working edition.

* Corrected test to QR decomposition.

* Added tad multithreading with QR implementation.

* Finished cpu implementation for QR decomposition helpers.

* Refactored tests and improved multithreading.

* Refactored QR cpu implementation and update cuda implementation helpers.

* Cuda QR helper implementation. The first working edition.

* Eliminated waste prints.

* Restore multithreading with cuda implementation.

* Ops names corrected

* Refactored qr op helpers to optimize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste manual ticking.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored memory allocation to avoid waste memory usage.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matrixMinor method both for cuda and cpu platforms.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored method of vmul to use raw buffers instead type conversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored temporary array of matricies.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-22 11:59:36 +01:00
+								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
-												Shugeo qr (#153)

* Added qr op implementation. Initial version.

* Fixed doc for qr op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Implementation of QR decomposition. CPU platform version.

* Added a pair of tests for qr op testing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* QR implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected norm using.

* Properly calculated intermediate results with QR decomposition.

* Another step to implement QR algorithm by householder.

* Cpu implementatio for QR decomposition. The first working edition.

* Corrected test to QR decomposition.

* Added tad multithreading with QR implementation.

* Finished cpu implementation for QR decomposition helpers.

* Refactored tests and improved multithreading.

* Refactored QR cpu implementation and update cuda implementation helpers.

* Cuda QR helper implementation. The first working edition.

* Eliminated waste prints.

* Restore multithreading with cuda implementation.

* Ops names corrected

* Refactored qr op helpers to optimize.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste manual ticking.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored memory allocation to avoid waste memory usage.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matrixMinor method both for cuda and cpu platforms.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored method of vmul to use raw buffers instead type conversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored temporary array of matricies.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
Co-authored-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-22 11:59:36 +01:00
+								        /**
 								         * QR decomposition: A = QR, where Q is ortogonal (Q * QT = I) and R is upper triangular.
 								         * For A (MxN) Q is M x M and R is (NxN).
 								         *
 								         * Input :
 								         *    0 - float (or complex float) tensor with shape {.,..,...,M,N} - batch of float matricies
 								         *
 								         * Output:
 								         *    0 - float tensor with shape {.,..,...,MxN} - batch of ortogonal matricies {Qs}
 								         *    1 - float tensor with shape {.,..,...,NxN} - batch of upper triangular matricies {Rs}
 								         */
 								        #if NOT_EXCLUDED(OP_qr)
 								        DECLARE_CUSTOM_OP(qr, 1, 2, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
 								        /**
 								         * This operation takes 2 arrays: original values, and values to be excluded. And returns 2 arrays: values left after exclusion, and indices in original array for surivals.
 								         * Expected arguments:
 								         * 0: vector with original values
 								         * 1: vector with values to exclude
 								         */
 								        #if NOT_EXCLUDED(OP_listdiff)
 								        DECLARE_CUSTOM_OP(listdiff, 2, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation applies Add operation to specific inputs wrt indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_add)
 								        DECLARE_OP(scatter_add, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Subtract operation to specific inputs wrt indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_sub)
 								        DECLARE_OP(scatter_sub, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Multiply operation to specific inputs wrt indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_mul)
 								        DECLARE_OP(scatter_mul, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Divide operation to specific inputs wrt indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_div)
 								        DECLARE_OP(scatter_div, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Assign operation to specific inputs wrt indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_upd)
 								        DECLARE_OP(scatter_upd, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Max operation to specific inputs through given indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_max)
 								        DECLARE_OP(scatter_max, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation applies Min operation to specific inputs through given indices
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing indexes for first dimension of input
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_min)
 								        DECLARE_OP(scatter_min, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation scatter "updates" elements into new output array according to given "indices"
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * Expected arguments:
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * indices: array containing elements/slices indexes of output array to put "updates" elements into, the rest output elements will be zeros
 								         * updates: array containing elements to be inserted into output array
 								         * shape: contains shape of output array
 								         */
 								        #if NOT_EXCLUDED(OP_scatter_nd)
 								        DECLARE_CUSTOM_OP(scatter_nd, 3, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation scatter "updates" elements into input array along given "indices"
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * Expected arguments:
 								         * input: array to be updated
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * indices: array containing elements/slices indexes of input array to put "updates" elements into
 								         * updates: array containing elements to be inserted into input array
 								         */
 								        #if NOT_EXCLUDED(OP_scatter_nd_update)
 								        DECLARE_OP(scatter_nd_update, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation adds "updates" elements to input array along given "indices"
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing elements/slices indexes of input array to add "updates" elements to
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_add)
 								        DECLARE_OP(scatter_nd_add, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation subtract "updates" elements from input array along given "indices"
 								         * Expected arguments:
 								         * input: array to be updated
 								         * indices: array containing elements/slices indexes of input array to subtract "updates" elements from
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * updates: array containing elements to be interfered with input
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_scatter_sub)
 								        DECLARE_OP(scatter_nd_sub, 3, 1, true);
 								        #endif
 								        /**
 								         * This operation takes input's shape, and returns new NDArray filled with specified value
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T args:
 								         * 0: scalar value, used to fill NDArray
 								         */
 								        #if NOT_EXCLUDED(OP_fill_as)
 								        DECLARE_CONFIGURABLE_OP(fill_as, 1, 1, true, 1, 0);
 								        #endif
 								        /**
 								         * This operation applies element-wise rint (round to integral value) operation
 								         */
 								        #if NOT_EXCLUDED(OP_rint)
 								        DECLARE_OP(rint, 1, 1, true);
 								        #endif
 								        /**
 								         * This operation returns unique elements from input array as vector, and their original indices in input array
 								         * Expected input:
 								         * input: N-dimensional array
 								         */
 								        #if NOT_EXCLUDED(OP_unique)
 								        DECLARE_CUSTOM_OP(unique, 1, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation returns 3 1D arrays for given 1D array with unique element count and indexes
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * input:
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *     0 - 1D array
 								         *
 								         * output:
 								         *     0 - 1D array with unique values
 								         *     1 - 1D array with ids for values in array above
 								         *     2 - 1D array with counts for values in array above
 								         */
 								        #if NOT_EXCLUDED(OP_unique_with_counts)
 								        DECLARE_CUSTOM_OP(unique_with_counts, 1, 3, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation splits input NDArray into multiple TADs along given dimensions
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args:
 								         * 0..: TAD axis
 								         */
 								        #if NOT_EXCLUDED(OP_tear)
 								        DECLARE_CUSTOM_OP(tear, 1, -1, false, 0, -1);
 								        #endif
 								        /**
 								         * This op does the same as tear, just uses different input format:
 								         * @tparam T
 								         */
 								        #if NOT_EXCLUDED(OP_unstack)
 								        DECLARE_CUSTOM_OP(unstack, 1, -1, false, 0, 1);
 								        #endif
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * This operation extracts a strided (optionally) slice from a tensor,
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_strided_slice)
 								        DECLARE_CUSTOM_OP(strided_slice, 1, 1, false, 0, 5); // TODO: new op type needed. that returns VIEW
 								        DECLARE_CUSTOM_OP(strided_slice_bp, 2, 1, false, 0, 5);
 								        #endif
 								        /**
 								         * This operation extracts a slice from a tensor.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_slice)
 								        DECLARE_CUSTOM_OP(slice, 1, 1, false, 0, -2);
 								        DECLARE_CUSTOM_OP(slice_bp, 2, 1, false, 0, -2);
 								        #endif
 								        /**
 								         * This operation generate sequences. Basically from......to, with step used as increment.
 								         * Expected arguments:
 								         * start: optional scalar with starting value
 								         * stop: optional scalar with end value
 								         * step: optional scalar witn step value
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args: (optional)
 								         * 0: optional scalar with starting value
 								         * 1: optional scalar with end value
 								         * 1: optional scalar witn step value
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T args: (optional)
 								         * 0: optional scalar with starting value
 								         * 1: optional scalar with end value
 								         * 1: optional scalar witn step value
 								         */
 								        #if NOT_EXCLUDED(OP_range)
 								        DECLARE_CUSTOM_OP(range, -2, 1, false, -2, -2);
 								        #endif
 								        /**
 								         * This operation return one-hot encoded n-dimensional array
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T args:
 								         * 0: 'on' value
 								         * 1: 'off' value
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int args:
 								         * 0: depth
 								         * 1: axis
 								         */
 								        #if NOT_EXCLUDED(OP_onehot)
 								        DECLARE_CUSTOM_OP(onehot, 1, 1, false, -2, -2);
 								        #endif
 								        /**
 								         * This operation calculate the confusion matrix for a
 								         * pair of prediction and label 1-D arrays.
 								         * Expected arguments:
 								         * Input arrays:
 								         *   0 - predictions: 1-D array
 								         *   1 - labels: 1-D array
 								         *   2 - weights : optional
 								         * Int args:
 								         *   0 - num_classes: optional
 								         *
 								         */
 								        #if NOT_EXCLUDED(OP_confusion_matrix)
 								        DECLARE_CUSTOM_OP(confusion_matrix, 2, 1, false, 0, -2);
 								        #endif
 								        /**
 										 * This operation stacks a list of rank tensors into one rank-(R+1) tensor.
 										 * Expected arguments:
 										 * 0...: N-Dimensional arrays to stack
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+										 *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+										 */
 								        #if NOT_EXCLUDED(OP_stack)
 								        DECLARE_CUSTOM_OP(stack, -1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * This operation returns length of input array
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * TODO: make this operation reduction, to allow TAD -> size
 								         */
 								        #if NOT_EXCLUDED(OP_size)
 								        DECLARE_CUSTOM_OP(size, 1, 1, false, 0, 0); // add DeclarableScalarOp?
 								        #endif
 								        /**
 								         * This operation returns rank of input array as scalar value.
 								         */
 								        #if NOT_EXCLUDED(OP_rank)
 								        DECLARE_CUSTOM_OP(rank, 1, 1, false, 0, 0); // ^
 								        #endif
 								        #if NOT_EXCLUDED(OP_broadcastgradientargs)
 								        DECLARE_OP(broadcastgradientargs, 2, 2, true);
 								        #endif
 								        /**
 								         * This operation takes input's shape, and returns new NDArray filled with zeros
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_zeros_as)
-												Configurable DataType for ops (#201)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - one more test for OneHot with dtype
- one more signature in Nd4j

Signed-off-by: raver119 <raver119@gmail.com>

* ones_as/zeros_as now accept dtype

Signed-off-by: raver119 <raver119@gmail.com>

* one more test

Signed-off-by: raver119 <raver119@gmail.com>

* - more updates for configurable data types
- ones_as/zeros_as java side + tests

Signed-off-by: raver119 <raver119@gmail.com>

* few c++ tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes around DArgs

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-30 16:46:12 +01:00
+								        DECLARE_CUSTOM_OP(zeros_as, 1, 1, false, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * This operation takes input's shape, and returns new NDArray filled with ones
 								         * Expected arguments:
 								         * input: N-dimensional array
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_ones_as)
-												Configurable DataType for ops (#201)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - one more test for OneHot with dtype
- one more signature in Nd4j

Signed-off-by: raver119 <raver119@gmail.com>

* ones_as/zeros_as now accept dtype

Signed-off-by: raver119 <raver119@gmail.com>

* one more test

Signed-off-by: raver119 <raver119@gmail.com>

* - more updates for configurable data types
- ones_as/zeros_as java side + tests

Signed-off-by: raver119 <raver119@gmail.com>

* few c++ tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* few more changes around DArgs

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2020-01-30 16:46:12 +01:00
+								        DECLARE_CUSTOM_OP(ones_as, 1, 1, false, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * This operation applies element-wise pow(x, 2) to the given input
 								         * Expected arguments:
 								         * input: N-Dimensional array
 								         */
 								        #if NOT_EXCLUDED(OP_square)
 								        DECLARE_OP(square, 1, 1, true);
 								        #endif
 								        /**
 								        * This op calculates Hurwitz zeta function zeta(x, q) = sum_{n=0}^{inf} (q + n)^{-x}
 								        * Implementation is based on Euler-Maclaurin summation formula
 								        *
 								        *   Input arrays:
 								        *   x: define power {-x}, must be > 1, type float.
 								        *   q: define summand in denominator, must be > 0, type float.
 								        *
 								        * Output array:
 								        *    0: corresponding values of Hurwitz zeta function
 								        *
 								        * Two input and one output arrays must have the same shape
 								        */
 								        #if NOT_EXCLUDED(OP_zeta)
 								        DECLARE_CONFIGURABLE_OP(zeta, 2, 1, false, 0, 0);
 								        #endif
 								        /**
 								        * This op calculates polygamma function psi^(n)(x). Implementation is based on serial representation written in
 								        * terms of the Hurwitz zeta function: polygamma = (-1)^{n+1} * n! * zeta(n+1, x).
 								        *
 								        * Input arrays:
 								        *    0: n - define derivative order (n+1), type integer (however currently is implemented as float casted to integer)
 								        *    1: x - abscissa points where to evaluate the polygamma function, type float
 								        *
 								        * Output array:
 								        *    0: values of polygamma function at corresponding x, type float
 								        *
 								        * Two input and one output arrays have the same shape
 								        */
 								        #if NOT_EXCLUDED(OP_polygamma)
 								        DECLARE_CONFIGURABLE_OP(polygamma, 2, 1, false, 0, 0);
 								        #endif
-												Shugeo lgamma (#170)

* lgamma op. Initial version.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored lgamma op and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Lgamma wrapper

* Added TF mapping

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-01-20 10:29:36 +01:00
+								       /**
 								        * This op calculates lgamma function lgamma(x) = log(Gamma(x))
 								        *
 								        * Input arrays:
 								        *    0: x - input matrix
 								        *
 								        * Output array:
 								        *    0: log of Gamma(x)
 								        *
 								        */
 								        #if NOT_EXCLUDED(OP_lgamma)
 								        DECLARE_OP(lgamma, 1, 1, true);
 								        #endif
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								        /**
 								        * This op calculates digamma function psi(x) = derivative of log(Gamma(x))
 								        *
 								        * Input arrays:
 								        *    0: x - abscissa points where to evaluate the digamma function, type float
 								        *
 								        * Output array:
 								        *    0: values of digamma function at corresponding x, type float
 								        *
 								        */
 								        #if NOT_EXCLUDED(OP_digamma)
 								        DECLARE_CONFIGURABLE_OP(digamma, 1, 1, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
 								         * This operation takes shape as first argument, and returns new NDArray filled with specific scalar value.
 								         * Input arrays:
 								         * 0 - shape vector
 								         * 1 - optional scalar NDArray
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T arguments:
 								         * 0 - optional scalar value
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_fill)
 								        DECLARE_CUSTOM_OP(fill, 1, 1, false, -2, 0);
 								        #endif
 								        /**
 								         * This operation splits given NDArray into chunks of specific size, along given dimension
 								         * Input arrays:
 								         * 0 - input array
 								         * 1 - array of sizes
 								         * 2 - optional axis
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Integer arguments:
 								         * 0 - optional axis
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_split_v)
 								        DECLARE_CUSTOM_OP(split_v, 2, -1, false, 0, -2);
 								        #endif
 								        /**
 								         * This operation splits given NDArray into chunks of specific size, along given dimension
 								         * 0 - input array
 								         * 1 - optional axis
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Integer arguments:
 								         * 0 - number of splits
 								         * 1 - optional axis
 								         */
 								        #if NOT_EXCLUDED(OP_split)
 								        DECLARE_CUSTOM_OP(split, 1, -1, false, 0, 1);
 								        #endif
 								        /**
 								         * This operation adjusts image hue by delta
 								         * Input arrays:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * 0 - input array with rank >= 3, must have at least one dimension equal 3, that is dimension containing channels.
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 1 - optional argument, input scalar-array containing delta
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T arguments:
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 0 - optional argument, delta value
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int arguments:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * 0 - optional argument, corresponds to dimension with 3 channels
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_adjust_hue)
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								        DECLARE_CONFIGURABLE_OP(adjust_hue, 1, 1, true, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * This operation adjusts image saturation by delta
 								         * Input arrays:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * 0 - input array with rank >= 3, must have at least one dimension equal 3, that is dimension containing channels.
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 1 - optional argument, input scalar-array containing saturation factor
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T arguments:
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 0 - optional argument, saturation factor
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Int arguments:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * 0 - optional argument, corresponds to dimension with 3 channels
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_adjust_saturation)
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								        DECLARE_CONFIGURABLE_OP(adjust_saturation, 1, 1, true, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
-												Implementation of adjust_contrast op.

											
										
										
											2019-09-30 17:24:12 +02:00
+								        /**
 								         * This operation adjusts image contrast by given factor ( z = (x - mean) * factor + mean )
 								         * Input arrays:
 								         * 0 - input array with rank >= 3, must have last one dimension equal 3, that is dimension containing channels.
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 1 - optional argument, input scalar-array containing saturation contrast factor
-												Implementation of adjust_contrast op.

											
										
										
											2019-09-30 17:24:12 +02:00
+								         *
 								         * T arguments:
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								         * 0 - optional argument, contrast factor
-												Implementation of adjust_contrast op.

											
										
										
											2019-09-30 17:24:12 +02:00
+								         *
 								         */
 								        #if NOT_EXCLUDED(OP_adjust_contrast)
-												Shyrma adjust (#98)

* - add possibility of passing scalar-array as input parameter for scale factor in adjust hue/contrast/saturation ops
- correct typo in function which calculates regularized incomplete beta integral

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fix bug in betainc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - start working on implementation of digamma function

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on digamma function (cpu)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in digamma op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make correction n cuda kernel for polyGamma

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove unnecessary stuff from betaInc cuda kernel

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts in DeclarableOpsTests3.cpp after master branch has been merged

Signed-off-by: Yurii <iuriish@yahoo.com>

* - restore id number of Not opertion in legacy_ops.h

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correct padding calculation in mkl dnn conv1d causal

Signed-off-by: Yurii <iuriish@yahoo.com>

* restore empty check in adjust_contrast_v2

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-12-03 07:40:45 +01:00
+								        DECLARE_CONFIGURABLE_OP(adjust_contrast, 1, 1, true, 0, 0);
 								        DECLARE_CONFIGURABLE_OP(adjust_contrast_v2, 1, 1, true, 0, 0);
-												Implementation of adjust_contrast op.

											
										
										
											2019-09-30 17:24:12 +02:00
+								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
 								        /**
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * This operation rearranges data from depth into blocks of spatial data. This is the reverse transformation
 								         * of space_to_depth op. This op output is a copy of the input tensor where values from the depth dimension
 								         * are moved in spatial blocks to the height and width dimensions. Int attr 0 indicates the input
 								         * block size and how the data is moved.
 								         * Input:
 								         *     0 - 4D tensor on given type
 								         * Output:
 								         *     0 - 4D tensor of given type and proper shape
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * Int arguments:
 								         *     0 - block size
 								         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
 								         *                             1 ("NCHW"): shape{ batch, channels, height, width }
 								         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
 								         *                             optional (default 0)
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_depth_to_space)
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								        DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, -1);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * This operation rearranges blocks of spatial data, into depth.This op output is a copy of the input tensor
 								         * where values from the height and width dimensions are moved to the depth dimension. Int attr 0 indicates
 								         * the input block size.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * Input:
 								         *     - 4D tensor of given type
 								         * Output:
 								         *     - 4D tensor
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * Int arguments:
 								         *     0 - block size
 								         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
 								         *                             1 ("NCHW"): shape{ batch, channels, height, width }
 								         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
 								         *                             optional (default 0)
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *
 								         */
 								        #if NOT_EXCLUDED(OP_space_to_depth)
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								        DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, -1);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * This op calculates cross-product between input arguments
 								         * Input arguments
 								         * 0 - vector or tensor A
 								         * 1 - vector or tensor B
 								         */
 								        #if NOT_EXCLUDED(OP_cross)
 								        DECLARE_OP(cross, 2, 1, false);
 								        #endif
 								        /**
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         * Zero-pads and then rearranges (permutes) blocks of spatial data into batch. More specifically, this op
 								         * outputs a copy of the input tensor where values from the height and width dimensions are moved to the
 								         * batch dimension. After the zero-padding, both height and width of the input must be divisible by the block
 								         * size.
 								         *
 								         * Inputs:
 								         *  0 - input tensor
 								         *  1 - 2D paddings tensor (shape {M, 2})
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								         *  Output:
 								         *    - result tensor
 								         *
 								         *  Int args:
 								         *      0 - block size (M)
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_space_to_batch)
-												[WIP] More of CUDA (#95)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* Implementation of hashcode cuda helper. Working edition.

* Fixed parallel test input arangements.

* Fixed tests for hashcode op.

* Fixed shape calculation for image:crop_and_resize op and test.

* NativeOps tests. Initial test suite.

* Added tests for indexReduce methods.

* Added test on execBroadcast with NDArray as dimensions.

* Added test on execBroadcastBool with NDArray as dimensions.

* Added tests on execPairwiseTransform and execPairwiseTransofrmBool.

* Added tests for execReduce with scalar results.

* Added reduce tests for non-empty dims array.

* Added tests for reduce3.

* Added tests for execScalar.

* Added tests for execSummaryStats.

* - provide cpu/cuda code for batch_to_space
- testing it

Signed-off-by: Yurii <yurii@skymind.io>

* - remove old test for batch_to_space (had wrong format and numbers were not checked)

Signed-off-by: Yurii <yurii@skymind.io>

* Fixed complilation errors with test.

* Added test for execTransformFloat.

* Added test for execTransformSame.

* Added test for execTransformBool.

* Added test for execTransformStrict.

* Added tests for execScalar/execScalarBool with TADs.

* Added test for flatten.

* - provide cpu/cuda code for space_to_Batch operaion

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for concat.

* comment unnecessary stuff in s_t_b

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for specialConcat.

* Added tests for memcpy/set routines.

* Fixed pullRow cuda test.

* Added pullRow test.

* Added average test.

* - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...)

Signed-off-by: Yurii <yurii@skymind.io>

* - debugging and fixing cuda tests in JavaInteropTests file

Signed-off-by: Yurii <yurii@skymind.io>

* - correct some tests

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for shuffle.

* Fixed ops declarations.

* Restored omp and added shuffle test.

* Added convertTypes test.

* Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps.

* Added sort tests.

* Added tests for execCustomOp.

* - further debuging and fixing tests terminated with crash

Signed-off-by: Yurii <yurii@skymind.io>

* Added tests for calculateOutputShapes.

* Addded Benchmarks test.

* Commented benchmark tests.

* change assertion

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for apply_sgd op. Added cpu helper for that op.

* Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps.

* Added test for assign broadcastable.

* Added tests for assign_bp op.

* Added tests for axpy op.

* - assign/execScalar/execTransformAny signature change
- minor test fix

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed axpy op.

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* - fix tests for nativeOps::concat

Signed-off-by: Yurii <yurii@skymind.io>

* sequential transform/scalar

Signed-off-by: raver119 <raver119@gmail.com>

* allow nested parallelism

Signed-off-by: raver119 <raver119@gmail.com>

* assign_bp leak fix

Signed-off-by: raver119 <raver119@gmail.com>

* block setRNG fix

Signed-off-by: raver119 <raver119@gmail.com>

* enable parallelism by default

Signed-off-by: raver119 <raver119@gmail.com>

* enable nested parallelism by default

Signed-off-by: raver119 <raver119@gmail.com>

* Added cuda implementation for row_count helper.

* Added implementation for tnse gains op helper.

* - take into account possible situations when input arrays are empty in reduce_ cuda stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces.

* Added kernel for tsne/symmetrized op heleper.

* Implementation of tsne/symmetrized op cuda helper. Working edition.

* Eliminated waste printfs.

* Added test for broadcastgradientargs op.

* host-only fallback for empty reduce float

Signed-off-by: raver119 <raver119@gmail.com>

* - some tests fixes

Signed-off-by: Yurii <yurii@skymind.io>

* - correct the rest of reduce_ stuff

Signed-off-by: Yurii <yurii@skymind.io>

* - further correction of reduce_ stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for Cbow op. Also added cuda implementation for cbow helpers.

* - improve code of stack operation for scalar case

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda kernel for gatherND operation

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of cbow helpers with cuda kernels.

* minor tests tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* minor tests tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* - further correction of cuda stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Implementatation of cbow op helper with cuda kernels. Working edition.

* Skip random testing for cudablas case.

* lstmBlockCell context fix

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for ELU and ELU_BP ops.

* Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops.

* Added tests for neq_scalar.

* Added test for noop.

* - further work on clipbynorm_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - get rid of concat op call, use instead direct concat helper call

Signed-off-by: Yurii <yurii@skymind.io>

* lstmBlockCell context fix

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for lrelu and lrelu_bp.

* Added tests for selu and selu_bp.

* Fixed lrelu derivative helpers.

* - some corrections in lstm

Signed-off-by: Yurii <yurii@skymind.io>

* operator * result shape fix

Signed-off-by: raver119 <raver119@gmail.com>

* - correct typo in lstmCell

Signed-off-by: Yurii <yurii@skymind.io>

* few tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* CUDA inverse broadcast bool fix

Signed-off-by: raver119 <raver119@gmail.com>

* disable MMAP test for CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* BooleanOp syncToDevice

Signed-off-by: raver119 <raver119@gmail.com>

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* additional data types for im2col/col2im

Signed-off-by: raver119 <raver119@gmail.com>

* Added test for firas_sparse op.

* one more RandomBuffer test excluded

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for flatten op.

* Added test for Floor op.

* bunch of tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* mmulDot tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Implemented floordiv_bp op and tests.

* Fixed scalar case with cuda implementation for bds.

* - work on cuda kernel for clip_by_norm backprop op is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Eliminate cbow crach.

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Eliminated abortion with batched nlp test.

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed shared flag initializing.

* disabled bunch of cpu workspaces tests

Signed-off-by: raver119 <raver119@gmail.com>

* scalar operators fix: missing registerSpecialUse call

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed logdet for cuda and tests.

* - correct clipBynorm_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Fixed crop_and_resize shape datatype.

* - correct some mmul tests

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-08-02 19:01:03 +02:00
+								        DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								        /*
 								         * This operation divides "spatial" dimensions [1, ..., M] of the input into a grid of blocks of shape
 								         * block_shape, and interleaves these blocks with the "batch" dimension (0) such that in the output,
 								         * the spatial dimensions [1, ..., M] correspond to the position within the grid, and the batch dimension
 								         * combines both the position within a spatial block and the original batch position. Prior to division into
 								         * blocks, the spatial dimensions of the input are optionally zero padded according to paddings.
 								         *
 								         * Inputs:
 								         *      0 - input (N-D tensor)
 								         *      1 - block_shape - int 1D tensor with M length
 								         *      2 - paddings - int 2D tensor with shape {M, 2}
 								         *
 								         * Output:
 								         *      - N-D tensor with the same type as input 0.
 								         *
 								         * */
-												[WIP] stb/bts nd (#144)

* - start working on space_to_batch_nd

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu helper for space_to_batch_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* few typos fixed

Signed-off-by: raver119 <raver119@gmail.com>

* - add tests for space_to_batch and correct bugs

Signed-off-by: Yurii <yurii@skymind.io>

* - write cuda kernel for space_to_batch op

Signed-off-by: Yurii <yurii@skymind.io>

* - add order argument to shape::index2coords method in convolution cuda ops

Signed-off-by: Yurii <yurii@skymind.io>

* - restore some previous code

Signed-off-by: Yurii <yurii@skymind.io>

* old col2im kernel activated

Signed-off-by: raver119 <raver119@gmail.com>

* - change coords calculation in col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - restore old col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - add custom op for batch_to_space

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu version for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda kernel for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-08-21 20:11:46 +02:00
+								        #if NOT_EXCLUDED(OP_space_to_batch_nd)
 								        DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
 								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_batch_to_space)
-												[WIP] More of CUDA (#95)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* Implementation of hashcode cuda helper. Working edition.

* Fixed parallel test input arangements.

* Fixed tests for hashcode op.

* Fixed shape calculation for image:crop_and_resize op and test.

* NativeOps tests. Initial test suite.

* Added tests for indexReduce methods.

* Added test on execBroadcast with NDArray as dimensions.

* Added test on execBroadcastBool with NDArray as dimensions.

* Added tests on execPairwiseTransform and execPairwiseTransofrmBool.

* Added tests for execReduce with scalar results.

* Added reduce tests for non-empty dims array.

* Added tests for reduce3.

* Added tests for execScalar.

* Added tests for execSummaryStats.

* - provide cpu/cuda code for batch_to_space
- testing it

Signed-off-by: Yurii <yurii@skymind.io>

* - remove old test for batch_to_space (had wrong format and numbers were not checked)

Signed-off-by: Yurii <yurii@skymind.io>

* Fixed complilation errors with test.

* Added test for execTransformFloat.

* Added test for execTransformSame.

* Added test for execTransformBool.

* Added test for execTransformStrict.

* Added tests for execScalar/execScalarBool with TADs.

* Added test for flatten.

* - provide cpu/cuda code for space_to_Batch operaion

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for concat.

* comment unnecessary stuff in s_t_b

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for specialConcat.

* Added tests for memcpy/set routines.

* Fixed pullRow cuda test.

* Added pullRow test.

* Added average test.

* - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...)

Signed-off-by: Yurii <yurii@skymind.io>

* - debugging and fixing cuda tests in JavaInteropTests file

Signed-off-by: Yurii <yurii@skymind.io>

* - correct some tests

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for shuffle.

* Fixed ops declarations.

* Restored omp and added shuffle test.

* Added convertTypes test.

* Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps.

* Added sort tests.

* Added tests for execCustomOp.

* - further debuging and fixing tests terminated with crash

Signed-off-by: Yurii <yurii@skymind.io>

* Added tests for calculateOutputShapes.

* Addded Benchmarks test.

* Commented benchmark tests.

* change assertion

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for apply_sgd op. Added cpu helper for that op.

* Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps.

* Added test for assign broadcastable.

* Added tests for assign_bp op.

* Added tests for axpy op.

* - assign/execScalar/execTransformAny signature change
- minor test fix

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed axpy op.

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* - fix tests for nativeOps::concat

Signed-off-by: Yurii <yurii@skymind.io>

* sequential transform/scalar

Signed-off-by: raver119 <raver119@gmail.com>

* allow nested parallelism

Signed-off-by: raver119 <raver119@gmail.com>

* assign_bp leak fix

Signed-off-by: raver119 <raver119@gmail.com>

* block setRNG fix

Signed-off-by: raver119 <raver119@gmail.com>

* enable parallelism by default

Signed-off-by: raver119 <raver119@gmail.com>

* enable nested parallelism by default

Signed-off-by: raver119 <raver119@gmail.com>

* Added cuda implementation for row_count helper.

* Added implementation for tnse gains op helper.

* - take into account possible situations when input arrays are empty in reduce_ cuda stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces.

* Added kernel for tsne/symmetrized op heleper.

* Implementation of tsne/symmetrized op cuda helper. Working edition.

* Eliminated waste printfs.

* Added test for broadcastgradientargs op.

* host-only fallback for empty reduce float

Signed-off-by: raver119 <raver119@gmail.com>

* - some tests fixes

Signed-off-by: Yurii <yurii@skymind.io>

* - correct the rest of reduce_ stuff

Signed-off-by: Yurii <yurii@skymind.io>

* - further correction of reduce_ stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Added test for Cbow op. Also added cuda implementation for cbow helpers.

* - improve code of stack operation for scalar case

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda kernel for gatherND operation

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of cbow helpers with cuda kernels.

* minor tests tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* minor tests tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* - further correction of cuda stuff

Signed-off-by: Yurii <yurii@skymind.io>

* Implementatation of cbow op helper with cuda kernels. Working edition.

* Skip random testing for cudablas case.

* lstmBlockCell context fix

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for ELU and ELU_BP ops.

* Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops.

* Added tests for neq_scalar.

* Added test for noop.

* - further work on clipbynorm_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - get rid of concat op call, use instead direct concat helper call

Signed-off-by: Yurii <yurii@skymind.io>

* lstmBlockCell context fix

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for lrelu and lrelu_bp.

* Added tests for selu and selu_bp.

* Fixed lrelu derivative helpers.

* - some corrections in lstm

Signed-off-by: Yurii <yurii@skymind.io>

* operator * result shape fix

Signed-off-by: raver119 <raver119@gmail.com>

* - correct typo in lstmCell

Signed-off-by: Yurii <yurii@skymind.io>

* few tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* CUDA inverse broadcast bool fix

Signed-off-by: raver119 <raver119@gmail.com>

* disable MMAP test for CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* BooleanOp syncToDevice

Signed-off-by: raver119 <raver119@gmail.com>

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* additional data types for im2col/col2im

Signed-off-by: raver119 <raver119@gmail.com>

* Added test for firas_sparse op.

* one more RandomBuffer test excluded

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for flatten op.

* Added test for Floor op.

* bunch of tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* mmulDot tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Implemented floordiv_bp op and tests.

* Fixed scalar case with cuda implementation for bds.

* - work on cuda kernel for clip_by_norm backprop op is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Eliminate cbow crach.

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Eliminated abortion with batched nlp test.

* more tests fixed

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed shared flag initializing.

* disabled bunch of cpu workspaces tests

Signed-off-by: raver119 <raver119@gmail.com>

* scalar operators fix: missing registerSpecialUse call

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed logdet for cuda and tests.

* - correct clipBynorm_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Fixed crop_and_resize shape datatype.

* - correct some mmul tests

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-08-02 19:01:03 +02:00
+								        DECLARE_CUSTOM_OP(batch_to_space, 2, 1, false, 0, 1);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
-												[WIP] stb/bts nd (#144)

* - start working on space_to_batch_nd

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu helper for space_to_batch_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* few typos fixed

Signed-off-by: raver119 <raver119@gmail.com>

* - add tests for space_to_batch and correct bugs

Signed-off-by: Yurii <yurii@skymind.io>

* - write cuda kernel for space_to_batch op

Signed-off-by: Yurii <yurii@skymind.io>

* - add order argument to shape::index2coords method in convolution cuda ops

Signed-off-by: Yurii <yurii@skymind.io>

* - restore some previous code

Signed-off-by: Yurii <yurii@skymind.io>

* old col2im kernel activated

Signed-off-by: raver119 <raver119@gmail.com>

* - change coords calculation in col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - restore old col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - add custom op for batch_to_space

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu version for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda kernel for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>

											
										
										
											2019-08-21 20:11:46 +02:00
+								        #if NOT_EXCLUDED(OP_batch_to_space_nd)
 								        DECLARE_CUSTOM_OP(batch_to_space_nd, 3, 1, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * top_k operation returns a vector of k top values for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  given NDArray as tensor with default boolean (true)
 								         *  as sort for result index array
 								         *  will be sorted by the values in descending order.
 								         *  The first parameter is a NDArray for working.
 								         *  The second is k (default 1) - optional
 								         *  The third is boolean value(default is true) (0 - as is, 1 - sorted by value) optional
 								         */
 								        #if NOT_EXCLUDED(OP_top_k)
 								        DECLARE_CUSTOM_OP(top_k, 1, 2, false, 0, -1);
 								        #endif
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * in_top_k operation returns a vector of k boolean values for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  given NDArray as 2D matrix of predicted in the NDArray k top values
 								         *  The first parameter is a NDArray of predicted values (2d array).
 								         *  The second is NDArray as vector of indeces k top values will be search.
 								         *  The third is k
 								         */
 								        #if NOT_EXCLUDED(OP_in_top_k)
 								        DECLARE_CUSTOM_OP(in_top_k, 2, 1, true, 1, 1);
 								        #endif
 								        /**
 								         * moments operation calculate a mean and variation for given NDArray
 								         * with reduce a result according to axis array given.
 								         * For full axis the result is both mean and variance of all members in array.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * Otherwise there are two NDArrays with means and variances for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * Axes can be put as the second NDArray or as int vector.
 								         *
 								         * the optional flag "keep_dims" can be set as T param
 								         */
 								        #if NOT_EXCLUDED(OP_moments)
 								        DECLARE_CUSTOM_OP(moments, 1, 2, false, 0, -2);
 								        #endif
 								        /**
 								         * embedding_lookup - search for submatrices in given matrix and retunts them
 								         * accordingly to index array given.
 								         */
 								        #if NOT_EXCLUDED(OP_embedding_lookup)
 								        DECLARE_CUSTOM_OP(embedding_lookup, 2, 1, false, 0, 1);
 								        #endif
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * dynamic_partition - partition a input tensor onto num_partitions
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * accordingly to index array given.
 								         *
 								         * the first param - NDArray to be partitioned.
 								         * the second param - index array
 								         * the third param (integer param) - num or partitions.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * returns a num of NDArrays as output
 								         */
 								        #if NOT_EXCLUDED(OP_dynamic_partition)
 								        DECLARE_CUSTOM_OP(dynamic_partition, 2, 1, false, 0, 1);
 								        #endif
 								        #if NOT_EXCLUDED(OP_dynamic_partition_bp)
 								        DECLARE_CUSTOM_OP(dynamic_partition_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * dynamic_stitch - merge partitions from the second param a input tensor
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * into a single tensor accordingly to index array given.
 								         *
 								         * the first param - index array
 								         * the second params - tensors to be merged
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * returns a num of NDArrays as output
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * the operation is inversion od dynamic_partition
 								         */
 								        #if NOT_EXCLUDED(OP_dynamic_stitch)
 								        DECLARE_CUSTOM_OP(dynamic_stitch, 2, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * zero_fraction op.
 								         * compute a fraction of zeros in given array
 								         *
 								         * input param - an array (tensor)
 								         * output value - a real number with given type (e.g. float or double)
 								         */
 								        #if NOT_EXCLUDED(OP_zero_fraction)
 								        DECLARE_CUSTOM_OP(zero_fraction, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * xw_plus_b op.
 								         * multiply two first matrices and add third vector to each row of result
 								         *
 								         * input params:
 								         *   - 2D matrix NxM
 								         *   - 2D matrix MxN
 								         *   - 1D vector with N elements
 								         * output value - 2D matrix NxN as multiply of matrixes and add vector
-												xw_plus_b mkldnn implementation (#247)

* libnd4j first step of mkldnn for xw_plus_b and test of aurora crash in imageHelper

* libnd4j sync folders with master

* libnd4j merge master, raw implementation of xw_plus_b on mkldnn, clean up, need testing and adding checks for corresponded input shapes

* libnd4j corrections and checks added to xw_plus_b mkl

* libnd4j corrected dataType description based on mkl operation description, need more investigation

* libnd4j fixe xw_blus_b mkl implementation, need testing

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j two unit tests added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed check input dimensions bug

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libndj4 one more test added to cover different order handling

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added optional int arg support to define weights format, if arg == 1, mkldnn (do not need transpose in mkldnn implementation), else mmul weights format, corrected check points, added unit test

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j some improvements to avoid NDArray transpose in xw_plus_b operation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed issues connected with weights rank, also added support of one case based on tf (for mkldnn, cpu, cuda), test case added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added proper handling of empty inputs (all implementations)

* libnd4j fixed compilation error

* libnd4j several more corrections after conflict solve and fixed typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j removed unsupported data types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and fixed issues

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added propagation implementation for xw_plus_b, fixed issue connected with mkl weights data format, avoided data copy in transpose mode, test cases added, manually tested with gradCheck

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j one minor fix of double operation declaration

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j code clean up

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j minor tests fixes

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed build problem, integrate helpers changes

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

Co-authored-by: raver119 <raver119@gmail.com>
											
										
										
											2020-03-31 12:03:10 +02:00
+								         * Int args:
 								         *      0 - optional switcher of weights format, if int arg == 1 - mkldnn, else mmul
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_xw_plus_b)
-												xw_plus_b mkldnn implementation (#247)

* libnd4j first step of mkldnn for xw_plus_b and test of aurora crash in imageHelper

* libnd4j sync folders with master

* libnd4j merge master, raw implementation of xw_plus_b on mkldnn, clean up, need testing and adding checks for corresponded input shapes

* libnd4j corrections and checks added to xw_plus_b mkl

* libnd4j corrected dataType description based on mkl operation description, need more investigation

* libnd4j fixe xw_blus_b mkl implementation, need testing

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j two unit tests added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed check input dimensions bug

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libndj4 one more test added to cover different order handling

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added optional int arg support to define weights format, if arg == 1, mkldnn (do not need transpose in mkldnn implementation), else mmul weights format, corrected check points, added unit test

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j some improvements to avoid NDArray transpose in xw_plus_b operation

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed issues connected with weights rank, also added support of one case based on tf (for mkldnn, cpu, cuda), test case added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added proper handling of empty inputs (all implementations)

* libnd4j fixed compilation error

* libnd4j several more corrections after conflict solve and fixed typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j removed unsupported data types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and fixed issues

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j added propagation implementation for xw_plus_b, fixed issue connected with mkl weights data format, avoided data copy in transpose mode, test cases added, manually tested with gradCheck

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j one minor fix of double operation declaration

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j code clean up

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j minor tests fixes

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed build problem, integrate helpers changes

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

Co-authored-by: raver119 <raver119@gmail.com>
											
										
										
											2020-03-31 12:03:10 +02:00
+								                DECLARE_CUSTOM_OP(xw_plus_b, 3, 1, false, 0, 0);
 								                DECLARE_CUSTOM_OP(xw_plus_b_bp, 4, 3, false, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * This operation is missed due it simplicy.
 								         * Input and output params are the same after operation.
 								         * Input - NDArray, output - NDArray with the same shape.
 								         */
 								        #if NOT_EXCLUDED(OP_stop_gradient)
 								        DECLARE_OP(stop_gradient, 1, 1, true);
 								        #endif
 								        #if NOT_EXCLUDED(OP_parallel_stack)
 								        DECLARE_CUSTOM_OP(parallel_stack, -1, 1, false, 0, 0);
 								        #endif
 								        /**
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         * normalize_moments operation normalize already calculated mean and variation
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * accordingly to shift and count.
 								         * input params:
 								         *  - count of data
 								         *  - tensor with mean
 								         *  - tensor with variance (the same shape as before)
 								         *
 								         *  - optional floating point param shift.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  returns a normalized pair mean and variance with the same shapes as input
 								         */
 								        #if NOT_EXCLUDED(OP_normalize_moments)
 								        DECLARE_CUSTOM_OP(normalize_moments, 3, 2, false, 1, 0);
 								        #endif
 								        /**
 								         * sufficient_statistics operation return calculated mean and variation with data count.
 								         * this operation is invert for moments
 								         * accordingly to shift and count.
 								         * input params:
 								         *  - input tensor
 								         *  - axes vector
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
 								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  - optional floating point param shift.
 								         *  - optional int (as bool) keep_dimension
 								         *
 								         *  returns four tensors:
 								         *     - scalar tensor (data count)
 								         *     - sum elements of input (accross axises)
 								         *     - sum of squares of input (accross axises)
 								         *     - shift (if was given by input floating param)
 								         */
 								        #if NOT_EXCLUDED(OP_sufficient_statistics)
 								        DECLARE_CUSTOM_OP(sufficient_statistics, 2, 3, false, 0, 0);
 								        #endif
 								        /**
 								         * This op calculates weighted logarithmic loss of input
 								         * Input arguments
 								         *  0 - target
 								         *  1 - input
 								         *  2 - weights (scalar or vector with same as last dimension)
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  return value - a tensor with the same shape as target or input
 								         */
 								        #if NOT_EXCLUDED(OP_weighted_cross_entropy_with_logits)
 								        DECLARE_OP(weighted_cross_entropy_with_logits, 3, 1, true);
 								        #endif
 								        /**
 								         * This op calculates dropout of input
 								         * Input arguments
 								         *  0 - input tensor
 								         *  1 - noise_shape - (vector with shape to reduce) - optional
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  int parameter - seed for random numbers
 								         *  T parameter - probability (should be between 0 and 1)
 								         *  return value - a tensor with the same shape as target or input
 								         */
 								        #if NOT_EXCLUDED(OP_dropout)
 								        DECLARE_CONFIGURABLE_OP(dropout, 1, 1, true, 1, 1);
 								        #endif
 								        #if NOT_EXCLUDED(OP_dropout_bp)
 								        DECLARE_CONFIGURABLE_OP(dropout_bp, 2, 1, false, 1, 1);
 								        #endif
 								        /*  Calculates alpha weighted dropout
 								            T params:
 - drop probability
 - alpha value
 - alpha' value
 - beta value
 								         */
 								        #if NOT_EXCLUDED(OP_alpha_dropout_bp)
 								        DECLARE_CONFIGURABLE_OP(alpha_dropout_bp, 2, 1, false, 4, 1);
 								        #endif
 								        /**
 								         * bincount operation return a vector with element counted.
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *  - input tensor - only int part are accepted
 								         *  - weights - the same shape tensor with integer weights for element (optional)
 								         *  default weight - 1,1,1..,1 for all values in the tensor
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
 								         *  optional ints:
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *  - min_length - zero or greater
 								         *  - max_length - between min_length and max(input) + 1
 								         *
 								         *  returns four tensors:
 								         *     - vector tensor with length to min(max_len, max(input) + 1) with count
 								         *  of values in indexed place
 								         *
 								         */
 								        #if NOT_EXCLUDED(OP_bincount)
 								        DECLARE_CUSTOM_OP(bincount, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * broadcast_dynamic_shape op.
 								         *
 								         * input params:
 								         *    0 - the first shape (vector with shape)
 								         *    1 - the second shape (vector with shape)
 								         *
 								         * return value:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    vector with broadcasted shape
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_broadcast_dynamic_shape)
 								        DECLARE_CUSTOM_OP(broadcast_dynamic_shape, 2, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * matrix_determinant op.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
 								         *
 								         * return value:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    tensor with dimension (x * y * z * ::: *) with determinant for all
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * M x M matricies
 								         */
 								        #if NOT_EXCLUDED(OP_matrix_determinant)
 								        DECLARE_CUSTOM_OP(matrix_determinant, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * log_matrix_determinant op.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: *) with log determinant for all
 								         * M x M matricies
 								         */
 								        #if NOT_EXCLUDED(OP_log_matrix_determinant)
 								        DECLARE_CUSTOM_OP(log_matrix_determinant, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * logdet op. Logarithm of the determinant of hermitian positive matricies.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: *) with log determinant for all
 								         * M x M matricies
 								         */
 								        #if NOT_EXCLUDED(OP_logdet)
 								        DECLARE_CUSTOM_OP(logdet, 1, 1, false, 0, 0);
 								        #endif
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
-												Shugeo solve ls (#203)

* lstsq op. Initial commit.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Least squares linear problem solve op (lstsq). Cpu draft implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed shape routine and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added test for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Rectification for lstsq op implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected test to avoid numerical inconsistensy.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added prints for check computing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected tests to use evalueate facility instead.

Signed-off-by: shugeo <sgazeos@gmail.com>

* CPU implementation of MatrixSolveLs op and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cuda implementation for helpers with lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored tests for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added processing for empty inputs.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Merged tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored lstsq op for fast case.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed some issues with solve.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed lstsq op to avoid erros.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added kernel for giagonal factor

Signed-off-by: shugeo <sgazeos@gmail.com>

* lstsq wrapper and triangular_solve fixed

* Added proper processing empty inputs and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* SequenceMask test

* Build fixed

* Added proper processing of empty inputs with solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Mapping added

* Added check of input shapes with solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a couple of tests for lstsq op and minor changes with cuda helper for one.'

Signed-off-by: shugeo <sgazeos@gmail.com>

* Tests on

* Refactored test for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test

* Added another approach for lstsq op aka solve_ls.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished cpu part for solve_ls op helpers.

* Added helper for low triangular matrix inversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored alternate solve_ls cpu implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Removed alternate approach for solve_ls op. Added multithreading with matrix inversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Assert fixed

* Refactored multithreading for inverse matricies.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-02-28 09:37:26 +01:00
+								        /**
 								         * matrix_solve_ls op (lstsq) - solves one or more linear least-squares problems.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * N) - left parts of equations
 								         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
 								         *
 								         * float args:
 								         *    0 - l2_regularizer (default 0. and only for 0 implemented)
 								         *
 								         * boolean args:
 								         *    0 - fast - default is true (optional) - use Cholesky decomposition instead QR decomposition of matricies.
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: * N * K) with solutions
 								         *
 								         */
 								        #if NOT_EXCLUDED(OP_lstsq)
 								        DECLARE_CUSTOM_OP(lstsq, 2, 1, false, 0, 0);
 								        #endif
 								        /* solve_ls - analog of lstsq op with another solution approach
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * N) - left parts of equations
 								         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
 								         *
 								         * float args:
 								         *    0 - l2_regularizer (default 0. and only for 0 implemented)
 								         *
 								         * boolean args:
 								         *    0 - fast - default is true (optional) - use Cholesky decomposition instead QR decomposition of matricies.
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: * N * K) with solutions
 								         *
 								         * Note: if fast is false - then l2_regularizer arg is ignored and used lstsq method due QR decomposition
 								         * */
 								        #if NOT_EXCLUDED(OP_solve_ls)
 								                DECLARE_CUSTOM_OP(solve_ls, 2, 1, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
 								         * matrix_inverse op. - make inverse for all 2D square matricies found in the input tensor
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M)
 								         *
 								         * return value:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    tensor with dimension (x * y * z * ::: * M * M) with inverse M x M matricies in it
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_matrix_inverse)
 								        DECLARE_OP(matrix_inverse, 1, 1, true);
 								        #endif
-												Shugeo solve triangular (#173)

* Added implementation of the triangular_solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed compilation issues.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added verification of input data and helpers facilities for triangular_solve op.'

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cpu implementation for triangular_solve helpers.

* Added tests and implementation for upper triangular equations.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a pair of cases to tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added multithreading with cpu helpers for triangular_solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cuda implementation of triangular_solve op helpers.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished cuda implementation of triangular_solve helpers and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed copyright marks.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected grammar errors with doc and error messages.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matricies processing with triangular_solve cuda helper implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added triangular_solve wrapper

* Fixed mapping

* Added processing for adjoint with cpu helpers of triangular_solve op implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added implementation for adjoint routine with cuda platform.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added multithreading with adjoint routine for cpu platform.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-01-22 08:48:03 +01:00
+								        /**
 								         * triangular_solve op. - reverse Gaussian method for solve systems of linear equations.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M) - left parts of equations
 								         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
 								         *
 								         * boolean args:
 								         *    0 - lower - default is true (optional) - left part is lower triangular matrix
 								         *    1 - adjoint - default is false (optional) - indicate input matrix or its adjoint (hermitian addition) should be used
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: * M * K) with solutions
 								         *
 								         */
 								        #if NOT_EXCLUDED(OP_triangular_solve)
-												Shugeo solve ls (#203)

* lstsq op. Initial commit.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Least squares linear problem solve op (lstsq). Cpu draft implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed shape routine and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added test for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Rectification for lstsq op implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected test to avoid numerical inconsistensy.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added prints for check computing.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected tests to use evalueate facility instead.

Signed-off-by: shugeo <sgazeos@gmail.com>

* CPU implementation of MatrixSolveLs op and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cuda implementation for helpers with lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored tests for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added processing for empty inputs.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Merged tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored lstsq op for fast case.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed some issues with solve.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed lstsq op to avoid erros.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added kernel for giagonal factor

Signed-off-by: shugeo <sgazeos@gmail.com>

* lstsq wrapper and triangular_solve fixed

* Added proper processing empty inputs and test.

Signed-off-by: shugeo <sgazeos@gmail.com>

* SequenceMask test

* Build fixed

* Added proper processing of empty inputs with solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Mapping added

* Added check of input shapes with solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a couple of tests for lstsq op and minor changes with cuda helper for one.'

Signed-off-by: shugeo <sgazeos@gmail.com>

* Tests on

* Refactored test for lstsq op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed test

* Added another approach for lstsq op aka solve_ls.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished cpu part for solve_ls op helpers.

* Added helper for low triangular matrix inversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored alternate solve_ls cpu implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Removed alternate approach for solve_ls op. Added multithreading with matrix inversion.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Assert fixed

* Refactored multithreading for inverse matricies.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-02-28 09:37:26 +01:00
+								        DECLARE_CUSTOM_OP(triangular_solve, 2, 1, false, 0, 0);
-												Shugeo solve triangular (#173)

* Added implementation of the triangular_solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed compilation issues.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added verification of input data and helpers facilities for triangular_solve op.'

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cpu implementation for triangular_solve helpers.

* Added tests and implementation for upper triangular equations.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a pair of cases to tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added multithreading with cpu helpers for triangular_solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added cuda implementation of triangular_solve op helpers.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished cuda implementation of triangular_solve helpers and tests.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed copyright marks.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Corrected grammar errors with doc and error messages.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored matricies processing with triangular_solve cuda helper implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added triangular_solve wrapper

* Fixed mapping

* Added processing for adjoint with cpu helpers of triangular_solve op implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added implementation for adjoint routine with cuda platform.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added multithreading with adjoint routine for cpu platform.

Signed-off-by: shugeo <sgazeos@gmail.com>

Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-01-22 08:48:03 +01:00
+								        #endif
-												Shugeo solve linear (#191)

* linear equations systems solve op. Initial commit.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed compiling issues.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Linear equations systems solve. The next stage commit.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added test for linear equations systems solve operation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added additional test and fixed lower matrix retrievance.

* Implementation for solve of the systems of linear equations."

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored permutation generation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added restore for permutations batched with cuda helper for solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Finished cuda implementation for solve op helpers.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored cpu helpers for solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fix gtest output on Windows

* Fixed issue with permutation matrix for cuda implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed issue with permutation matrix for cpu implementation.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Eliminated waste comments.

Signed-off-by: shugeo <sgazeos@gmail.com>

* LinearSolve added

* Mapping added

* Javadoc added

* Refactored implementation of triangular_solve helpers and tests for solve matrix equations generally.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Added a test for solve op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Solve test added

* Fix for TF import

Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

											
										
										
											2020-02-04 06:59:11 +01:00
+								        /**
 								         * solve op. - solve systems of linear equations - general method.
 								         *
 								         * input params:
 								         *    0 - the tensor with dimension (x * y * z * ::: * M * M) - left parts of equations
 								         *    1 - the tensor with dimension (x * y * z * ::: * M * K) - right parts of equations
 								         *
 								         * boolean args:
 								         *    0 - adjoint - default is false (optional) - indicate input matrix or its adjoint (hermitian addition) should be used
 								         *
 								         * return value:
 								         *    tensor with dimension (x * y * z * ::: * M * K) with solutions
 								         *
 								         */
 								        #if NOT_EXCLUDED(OP_solve)
 								        DECLARE_CUSTOM_OP(solve, 2, 1, true, 0, 0);
 								        #endif
-												[WIP] Shugeo lup (#126)

* Added infrastructure for implementation op lu for both cuda and cpu platforms.

* Added implementation of helpers with lu op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Refactored LU decomposition to use vector of permutations instead.

* Refactored helpers for lu op.

* Fixed crash with determinant op.

* Refactored cpu LU op heleper.

* Added implementation for lu op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed issue with argmax on column.

* Added multithreaded behaviour for lu op helper.

* Fixed multithreaded cpu implementation helpers for lu op.

* Added cuda implementation for lu op helper.

* Finished lu helper implementation for cuda platform.

* Eliminated waste prints and comments.

* Fixed race condition and multithreading issues.

* Fixed memory leak with shape construction.

* Corrected test for lu op to avoid near zero elements on the main diagonal."

Signed-off-by: shugeo <sgazeos@gmail.com>

* Improved test for adjust_constast op.

Signed-off-by: shugeo <sgazeos@gmail.com>

* Fixed issues with cuda implementation of resize_bicubic helpers.

Signed-off-by: shugeo <sgazeos@gmail.com>

											
										
										
											2019-12-20 15:56:28 +01:00
+								        /**
 								         * lu op. - make LUP decomposition of given batch of 2D square matricies
 								         *
 								         * input params:
 								         *    0 - float tensor with dimension (x * y * z * ::: * M * M)
 								         *
 								         * return value:
 								         *    0 - float tensor with dimension (x * y * z * ::: * M * M) with LU M x M matricies in it
 								         *    1 - int (32 or 64) batched vector of permutations with length M - shape (x * y * z * ::: * M)
 								         *
 								         * int argument:
 								         *    0 - data type of output permutaion vector (int32 or int64), optional, default INT32
 								         */
 								        #if NOT_EXCLUDED(OP_matrix_inverse)
 								        DECLARE_CUSTOM_OP(lu, 1, 2, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
 								         * sequence_mask op. - make mask for given tensor filled by (j > x[i_1, i_2,...,i_n]) -> z[i_1, i_2,...,i_n,j]
 								         *
 								         * input params:
 								         *    0 - the ND-tensor filled by integer-like values
 								         *
 								         * optional int param - maxlength (maxlength >= max(x)). By default maxlength = max(x).
 								         * return value:
 								         *    (N+1)D tensor filled by 0 and 1 accordingly the mask
 								         */
 								        #if NOT_EXCLUDED(OP_sequence_mask)
 								        DECLARE_CUSTOM_OP(sequence_mask, 1, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * segment_max op. - make a tensor filled by max values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with max values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_segment_max)
 								        DECLARE_CUSTOM_OP(segment_max, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_segment_max_bp)
 								        DECLARE_CUSTOM_OP(segment_max_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * segment_min op. - make a tensor filled by min values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with min values according to indices sets.
 								         */
-												Shugeo doc (#235)

* Actualized doc to tnse ops.

* Added comments for dynamic_stitch op.

* Added comments to dynamic_stitch op implementation.

* Modified comment for unstack_list op.

* Added doc for space_to_depth and depth_to_space ops.

* Added doc for space_to_batch op.

* Enlarge test type for adjustSaturation.

* Added doc for runner.

											
										
										
											2019-09-04 13:57:59 +02:00
+								        #if NOT_EXCLUDED(OP_segment_min)
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        DECLARE_CUSTOM_OP(segment_min, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_segment_min_bp)
 								        DECLARE_CUSTOM_OP(segment_min_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * segment_sum op. - make a tensor filled by sum of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with sum of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_segment_sum)
 								        DECLARE_CUSTOM_OP(segment_sum, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_segment_sum_bp)
 								        DECLARE_CUSTOM_OP(segment_sum_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * segment_prod op. - make a tensor filled by product of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with product of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_segment_prod)
 								        DECLARE_CUSTOM_OP(segment_prod, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_segment_prod_bp)
 								        DECLARE_CUSTOM_OP(segment_prod_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * segment_mean op. - make a tensor filled by average of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with average of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_segment_mean)
 								        DECLARE_CUSTOM_OP(segment_mean, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_segment_mean_bp)
 								        DECLARE_CUSTOM_OP(segment_mean_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * unsorted_segment_max op. - make a tensor filled by max values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * return value:
 								         *    tensor with max values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_max)
 								        DECLARE_CUSTOM_OP(unsorted_segment_max, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_max_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_max_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * unsorted_segment_min op. - make a tensor filled by min values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * integer param:
 								         *    0 - num of segments
 								         *
 								         * return value:
 								         *    tensor with min values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_min_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_min, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_min_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_min_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * unsorted_segment_sum op. - make a tensor filled by sum of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * integer param:
 								         *    0 - num of segments
 								         *
 								         * return value:
 								         *    tensor with sum of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_sum)
 								        DECLARE_CUSTOM_OP(unsorted_segment_sum, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_sum_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_sum_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * unsorted_segment_prod op. - make a tensor filled by product of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * integer param:
 								         *    0 - num of segments
 								         *
 								         * return value:
 								         *    tensor with product of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_prod)
 								        DECLARE_CUSTOM_OP(unsorted_segment_prod, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_prod_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_prod_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * unsorted_segment_mean op. - make a tensor filled by average of values according to index tensor given.
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * integer param:
 								         *    0 - num of segments
 								         *
 								         * return value:
 								         *    tensor with average of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_mean)
 								        DECLARE_CUSTOM_OP(unsorted_segment_mean, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_mean_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_mean_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * unsorted_segment_sqrt_n op. - computes the sum along segments of a tensor divided by the sqrt(N).
 								         *
 								         * input params:
 								         *    0 - the tensor with data;
 								         *    1 - the tensor with indices.
 								         *
 								         * integer param:
 								         *    0 - num of segments
 								         *
 								         * return value:
 								         *    tensor with average of values according to indices sets.
 								         */
 								        #if NOT_EXCLUDED(OP_unsorted_segment_sqrt)
 								        DECLARE_CUSTOM_OP(unsorted_segment_sqrt_n, 2, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_unsorted_segment_sqrt_n_bp)
 								        DECLARE_CUSTOM_OP(unsorted_segment_sqrt_n_bp, 3, 2, false, 0, 1);
 								        #endif
 								        /**
 								         * extract_image_patches op - Extract patches from images and put them in the "depth" output dimension.
 								         *
 								         * input params:
 								         *    0 - images tensor (4D)
 								         *
 								         * int params:
 								         *    0 - ksize_rows
 								         *    1 - ksize_cols
 								         *    2 - strides_rows
 								         *    3 - strides_cols
 								         *    4 - rates_rows
 								         *    5 - rates_cols
 								         *    6 - padding_type - 0 - equiv 'VALID', 1 - 'SAME'
 								         */
 								        #if NOT_EXCLUDED(OP_extract_image_patches)
 								        DECLARE_CUSTOM_OP(extract_image_patches, 1, 1, false, 0, 7);
 								        #endif
-												draw_bounding_boxes op implementation. Inital revision.

											
										
										
											2019-10-04 17:32:21 +02:00
+								        /**
 								         * draw_bounding_boxes op - modified input image with given colors exept given boxes.
 								         *
 								         * input params:
 								         *    0 - images tensor (4D) with shape {batch, width, height, channels}, where channes is 1 (BW image),
 								         * 3 (RGB) or 4 (RGBA)
 								         *    1 - boxes tensor (3D) with shape {batch, number_of_boxes, 4} where last dimension encoded as
 								         * (y_min, x_min, y_max, x_max), all values in between 0. and 1.
 								         *    2 - colours tensor (2D) with shape {number_of_boxes, channels} -- bordering color set (palette)
 								         *
 								         * output:
 								         *    0 - 4D tensor with same shape as images (input 0)
 								         */
 								        #if NOT_EXCLUDED(OP_draw_bounding_boxes)
 								        DECLARE_OP(draw_bounding_boxes, 3, 1, true);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        /**
 								         * roll - op porting from numpy (https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.roll.html)
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *    0 - NDArray
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * int params:
 								         *    0 - shift
 								         *    1 - axe 1
 								         *    2 - axe 2
 								         *    ...
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    N - axe N
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *
 								         *    All axes are optional and should be between 0 and input->rankOf(). Of course, all axes can be repeated.
 								         *
 								         * output:
 								         *    0 - NDArray with the same shape as input.
 								         */
 								        #if NOT_EXCLUDED(OP_roll)
 								        DECLARE_CONFIGURABLE_OP(roll, 1, 1, true, 0, 1);
 								        #endif
 								        /**
 								         * lin_space - op porting from TF (https://www.tensorflow.org/api_docs/python/tf/lin_space)
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Lin_space operation improve (#373)

* libnd4j update linspace op

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j #8513 update lin_space op, tests added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* - minor linspace tweaks (num_elements now iArg)
- java linspace updates
- couple of additional tests for linspace

Signed-off-by: raver119 <raver119@gmail.com>

* roll back timeout change

Signed-off-by: raver119 <raver119@gmail.com>

Co-authored-by: raver119 <raver119@gmail.com>
											
										
										
											2020-04-16 13:53:56 +02:00
+								         * optional input params:
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *    0 - startVal - NDArray scalar (float point)
 								         *    1 - finishVal - NDArray scalar (float point)
 								         *    2 - numOfElements - NDArray scalar (integer)
-												Lin_space operation improve (#373)

* libnd4j update linspace op

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j #8513 update lin_space op, tests added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* - minor linspace tweaks (num_elements now iArg)
- java linspace updates
- couple of additional tests for linspace

Signed-off-by: raver119 <raver119@gmail.com>

* roll back timeout change

Signed-off-by: raver119 <raver119@gmail.com>

Co-authored-by: raver119 <raver119@gmail.com>
											
										
										
											2020-04-16 13:53:56 +02:00
+								         * Optional:
 								         * T args
 								         *    0 - startVal
 								         *    1 - finishVal]
 								         *    2 - numOfElements
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * output:
 								         *    0 - 1D NDArray with the same type as input and length as given with numOfElements param.
 								         */
 								        #if NOT_EXCLUDED(OP_lin_space)
-												Lin_space operation improve (#373)

* libnd4j update linspace op

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j #8513 update lin_space op, tests added

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* - minor linspace tweaks (num_elements now iArg)
- java linspace updates
- couple of additional tests for linspace

Signed-off-by: raver119 <raver119@gmail.com>

* roll back timeout change

Signed-off-by: raver119 <raver119@gmail.com>

Co-authored-by: raver119 <raver119@gmail.com>
											
										
										
											2020-04-16 13:53:56 +02:00
+								        DECLARE_CUSTOM_OP(lin_space, 0, 1, false, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /**
 								         * reduction_sum - tf.reduction_sum operation
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *    0 - NDArray
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T_ARG param (optional):
 								         * 0 - keep_dims != 0.
 								         *
 								         * int params (optional):
 								         *    0 - axe 1
 								         *    1 - axe 2
 								         *    ...
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    N-1 axe N
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *
 								         *    All axes are optional and should be between 0 and input->rankOf() - 1
 								         *
 								         * output:
 								         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
 								         */
 								        #if NOT_EXCLUDED(OP_reduce_sum)
 								        DECLARE_CUSTOM_OP(reduce_sum, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_sum_bp)
 								        DECLARE_CUSTOM_OP(reduce_sum_bp, 2, 1, false, 0, 0);
 								        #endif
 								        /**
 								         * reduction_prod - tf.reduction_prod operation
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *    0 - NDArray
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * T_ARG param (optional):
 								         * 0 - keep_dims != 0.
 								         *
 								         * int params (optional):
 								         *    0 - axe 1
 								         *    1 - axe 2
 								         *    ...
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    N-1 axe N
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *
 								         *    All axes are optional and should be between 0 and input->rankOf() - 1
 								         *
 								         * output:
 								         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
 								         */
 								        #if NOT_EXCLUDED(OP_reduce_prod)
 								        DECLARE_CUSTOM_OP(reduce_prod, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_prod_bp)
 								        DECLARE_CUSTOM_OP(reduce_prod_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates min of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate mins for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate min along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated mins
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_min)
 								        DECLARE_CUSTOM_OP(reduce_min, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_min_bp)
 								        DECLARE_CUSTOM_OP(reduce_min_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates max of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate maxes for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate max along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated maxes
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_max)
 								        DECLARE_CUSTOM_OP(reduce_max, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_max_bp)
 								        DECLARE_CUSTOM_OP(reduce_max_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates norm1 of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate norm1 for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate norm1 along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated norm1
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_norm1)
 								        DECLARE_CUSTOM_OP(reduce_norm1, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_norm1_bp)
 								        DECLARE_CUSTOM_OP(reduce_norm1_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates norm2 of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate norm2 for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate norm2 along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated norm2
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_norm2)
 								        DECLARE_CUSTOM_OP(reduce_norm2, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_norm2_bp)
 								        DECLARE_CUSTOM_OP(reduce_norm2_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates squared norm of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate squared norm for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate squared norm along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated norm
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_sqnorm)
 								        DECLARE_CUSTOM_OP(reduce_sqnorm, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_sqnorm_bp)
 								        DECLARE_CUSTOM_OP(reduce_sqnorm_bp, 2, 1, false, 0, 0);
 								        #endif
 								       /**
 								        * This op calculates norm max of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate norm max for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate norm max along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated norm
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_norm_max)
 								        DECLARE_CUSTOM_OP(reduce_norm_max, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_norm_max_bp)
 								        DECLARE_CUSTOM_OP(reduce_norm_max_bp, 2, 1, false, 0, 0);
 								        #endif
 								        /**
 								        * This op calculates mean of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate mean for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated means
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_mean)
 								        DECLARE_CUSTOM_OP(reduce_mean, 1, 1, false, 0, 0);
 								        #endif
 								        #if NOT_EXCLUDED(OP_reduce_mean_bp)
 								        DECLARE_CUSTOM_OP(reduce_mean_bp, 2, 1, false, 0, 0)
 								        #endif
 								        /**
 								        * This op calculates sample variance of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate mean for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *   biasCorrected -  if non zero, then bias correction will be applied, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated means
 								        */
 								        DECLARE_CUSTOM_OP(reduce_variance, 1, 1, false, 0, 0);
 								        DECLARE_CUSTOM_OP(reduce_variance_bp, 2, 1, false, 0, 0)
 								        /**
 								        * This op calculates sample standard deviation of elements along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate mean for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * float arguments:
 								        *   keepDims: if non zero, then keep reduced dimensions with length = 1, default value is zero
 								        *   biasCorrected - if non zero, then bias correction will be applied, default value is zero
 								        *
 								        * int arguments:
 								        *    list of integers - dimensions to calculate mean along, default corresponds to empty list in which case calculation is performed for all dimensions and scalar is returned
 								        *
 								        * output array:
 								        *    reduced tensor with calculated means
 								        */
 								        DECLARE_CUSTOM_OP(reduce_stdev, 1, 1, false, 0, 0);
 								        DECLARE_CUSTOM_OP(reduce_stdev_bp, 2, 1, false, 0, 0)
 								        /**
 								        * This op calculates backprop dot for two tensors along given dimensions
 								        *
 								        * input array:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *    x: tensor to calculate dot for
 								        *    y: tensor to calculate dot for
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *    z: tensor with gradient output of the FF dot for x and y
 								        *
 								        * int arguments:
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								        *   list of integers - dimensions to calculate dot along,
 								        *   default corresponds to empty list in which case calculation
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *   is performed for all dimensions and scalar is returned.
 								        *
 								        * output array:
 								        *   the tensor with calculated backproped dots
 								        *
 								        */
 								        #if NOT_EXCLUDED(OP_reduce_dot_bp)
 								        DECLARE_CUSTOM_OP(reduce_dot_bp, 3, 2, false, 0, 0);
 								        #endif
 								        /**
 								         * reduce_logsumexp - tf.reduce_logsumexe operation
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *    0 - NDArray (input)
 								         *    1 - 1D NDArray (axis) (optional) - integer array
 								         *
 								         * T_ARG param (optional):
 								         * 0 - keep_dims != 0.
 								         *
 								         * int params (optional):
 								         *    0 - axe 1
 								         *    1 - axe 2
 								         *    ...
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *    N-1 axe N
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         *
 								         *  CAUTION: All axes are optional and should be between 0 and input->rankOf() - 1
 								         *  and put either with second param or as integers but not both
 								         *
 								         * output:
 								         *    0 - NDArray with reduces shape accordingly to axes (the scalar in default case).
 								         */
 								        #if NOT_EXCLUDED(OP_reduce_logsumexp)
 								        DECLARE_CUSTOM_OP(reduce_logsumexp, 1, 1, false, 0, 0);
 								        #endif
-												SameDiff TF import (#49)

* Added implementation files for image_resize and resize_bicubic ops.

* Image resize and image.resize_bicubic ops implementation. Initial revision.

* Minor fix

* Some TF imports disabled.

* Finished with infrastructure development for image.resize_bilinear op and image_resizo op implementation.

* Refactored resize methods.

* Added processing for Mitchelcubic algorithm.

* adjust_contrast

* Small fix for TF import expected value loading when variable name starts with the test name

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tests

* Tests added.

* Removed tf names absent in mapping.

* Some fixes.

* Small fixes

* Minor change

* Some failing tests.

* Disable failed test

* Ignore some tests

* Fix import class mapping

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix float property mapping (flatbuffers)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Override equality function for model 'dropout'

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fail tests

* Failed tests ignored temporarily.

* Minor fixes

* Small fix

* Conflict resolved

* Default implementations of tensorflowName and onnxName

											
										
										
											2019-11-19 12:44:29 +01:00
+								       /**
 								        * Copy a tensor setting everything outside a central band in each innermost matrix
 								        *
 								        * input array:
 								        *    x: given tensor with shape {..., M, N} - as vector (matrix) of matricies MxN
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * int arguments:
-												SameDiff TF import (#49)

* Added implementation files for image_resize and resize_bicubic ops.

* Image resize and image.resize_bicubic ops implementation. Initial revision.

* Minor fix

* Some TF imports disabled.

* Finished with infrastructure development for image.resize_bilinear op and image_resizo op implementation.

* Refactored resize methods.

* Added processing for Mitchelcubic algorithm.

* adjust_contrast

* Small fix for TF import expected value loading when variable name starts with the test name

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tests

* Tests added.

* Removed tf names absent in mapping.

* Some fixes.

* Small fixes

* Minor change

* Some failing tests.

* Disable failed test

* Ignore some tests

* Fix import class mapping

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix float property mapping (flatbuffers)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Override equality function for model 'dropout'

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fail tests

* Failed tests ignored temporarily.

* Minor fixes

* Small fix

* Conflict resolved

* Default implementations of tensorflowName and onnxName

											
										
										
											2019-11-19 12:44:29 +01:00
+								        *   lower band
 								        *   upper band
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        * output array:
-												SameDiff TF import (#49)

* Added implementation files for image_resize and resize_bicubic ops.

* Image resize and image.resize_bicubic ops implementation. Initial revision.

* Minor fix

* Some TF imports disabled.

* Finished with infrastructure development for image.resize_bilinear op and image_resizo op implementation.

* Refactored resize methods.

* Added processing for Mitchelcubic algorithm.

* adjust_contrast

* Small fix for TF import expected value loading when variable name starts with the test name

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tests

* Tests added.

* Removed tf names absent in mapping.

* Some fixes.

* Small fixes

* Minor change

* Some failing tests.

* Disable failed test

* Ignore some tests

* Fix import class mapping

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix float property mapping (flatbuffers)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Override equality function for model 'dropout'

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fail tests

* Failed tests ignored temporarily.

* Minor fixes

* Small fix

* Conflict resolved

* Default implementations of tensorflowName and onnxName

											
										
										
											2019-11-19 12:44:29 +01:00
+								        *   matrix with given bands between lower and upper diagonals
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        *
 								        */
 								        #if NOT_EXCLUDED(OP_matrix_band_part)
 								        DECLARE_CONFIGURABLE_OP(matrix_band_part, 1, 1, true, 0, 2);
 								        #endif
 								        #if NOT_EXCLUDED(OP_Assert)
 								        DECLARE_OP(Assert, 1, 1, false);
 								        #endif
-												SameDiff TF import (#49)

* Added implementation files for image_resize and resize_bicubic ops.

* Image resize and image.resize_bicubic ops implementation. Initial revision.

* Minor fix

* Some TF imports disabled.

* Finished with infrastructure development for image.resize_bilinear op and image_resizo op implementation.

* Refactored resize methods.

* Added processing for Mitchelcubic algorithm.

* adjust_contrast

* Small fix for TF import expected value loading when variable name starts with the test name

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tests

* Tests added.

* Removed tf names absent in mapping.

* Some fixes.

* Small fixes

* Minor change

* Some failing tests.

* Disable failed test

* Ignore some tests

* Fix import class mapping

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix float property mapping (flatbuffers)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Override equality function for model 'dropout'

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fail tests

* Failed tests ignored temporarily.

* Minor fixes

* Small fix

* Conflict resolved

* Default implementations of tensorflowName and onnxName

											
										
										
											2019-11-19 12:44:29 +01:00
 								        /**
-												Shugeo_release_fixes3 (#81)

* Implementation for non_max_suppression_v3 was added. Initial version

* Added check for overcome threshold.

* Added definition for V3 method.

* java remapping for NonMaxSuppressionV3

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed proporly processing of an empty output and test.

* Refactored op to less threshold data to float.

* Implemented cuda-based helper for non_max_suppression_v3 op.

* Fixed fake_quant_with_min_max_vars op.

* Fixed tests with float numbers.

* - assert now stops execution
- sortByKey/sortByValue now have input validation

Signed-off-by: raver119 <raver119@gmail.com>

* missing var

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed proper processing for zero max_size inputs.

* Refactored kernel callers.

* Fixed return statement for logdet op helper.

* Refactored unsorted segment SqrtN op.

* get back 8 tail bytes on CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* Refactored segment prod ops and helpers for cuda and tests.

* Additional test.

* CudaWorkspace tests updated for 8 tail bytes

Signed-off-by: raver119 <raver119@gmail.com>

* special atomic test

Signed-off-by: raver119 <raver119@gmail.com>

* atomicMul/atomicDiv fix for 16bit values

Signed-off-by: raver119 <raver119@gmail.com>

* Eliminated waste prints.

											
										
										
											2019-11-28 19:08:51 +01:00
+								         * image.non_max_suppression ops.
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input:
 								         *     0 - boxes - 2D-tensor with shape (num_boxes, 4) by float type
 								         *     1 - scales - 1D-tensor with shape (num_boxes) by float type
 								         *     2 - output_size - 0D-tensor by int type (optional)
 								         * float args:
-												Shugeo suppression overlaps (#9)

* Added non_max_suppression_overlaps op and tests.

* Refactored implementation of non_max_suppression_overlaps.

* Refactoring of implementation of non_max_suppression_overlaps op.

* Refactoring of implementation of non_max_suppression op.

* Fixed portion error.

* Added cuda frontends for image suppression ops.

* Eliminated crash with cuda arch on image.non_max_suppression_overlaps op.

* Improved implementation of image_suppression helper for cpu platform.

* The generic approach of non_max_suppression_overlaps op helper with cuda platform.

* Working cuda implementation of helper non_max_suppression_overlaps op.

* Eliminated waste comments.

* Improved implementations for both platforms

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of non_max_suppression op helper.

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of image.non_max_suppression_overlaps op helper.

* Added modifications into cuda implementation for image suppression overlaps op.

* Correct queue emulation with cuda implementation of non_max_suppression_overlaps op.

* Prefinal stage of cuda implementation of non_max_suppression_overlaps.

* Worked cuda implementation of non_max_suppresion_overlaps helper.

* Fixed return to proper thread.

* Improvements for cuda implementation of image.non_max_suppression_overlaps op helper.

* Fixed implementation issues with non_max_suppression_overlaps on cuda platform.

* Fixed skip for non_max_suppression_overlaps on cuda platform.

* Finalize implementation of image_suppression helper and tests.

* Cosmetic changes only.

											
										
										
											2019-10-30 12:43:45 +01:00
+								         *     0 - overlap_threshold - threshold value for overlap checks (optional, by default 0.5)
 								         *     1 - score_threshold - the threshold for deciding when to remove boxes based on score (optional, by default -inf)
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * int args:
 								         *     0 - output_size - as arg 2 used for same target. Eigher this or arg 2 should be provided.
 								         *
-												Shugeo suppression overlaps (#9)

* Added non_max_suppression_overlaps op and tests.

* Refactored implementation of non_max_suppression_overlaps.

* Refactoring of implementation of non_max_suppression_overlaps op.

* Refactoring of implementation of non_max_suppression op.

* Fixed portion error.

* Added cuda frontends for image suppression ops.

* Eliminated crash with cuda arch on image.non_max_suppression_overlaps op.

* Improved implementation of image_suppression helper for cpu platform.

* The generic approach of non_max_suppression_overlaps op helper with cuda platform.

* Working cuda implementation of helper non_max_suppression_overlaps op.

* Eliminated waste comments.

* Improved implementations for both platforms

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of non_max_suppression op helper.

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of image.non_max_suppression_overlaps op helper.

* Added modifications into cuda implementation for image suppression overlaps op.

* Correct queue emulation with cuda implementation of non_max_suppression_overlaps op.

* Prefinal stage of cuda implementation of non_max_suppression_overlaps.

* Worked cuda implementation of non_max_suppresion_overlaps helper.

* Fixed return to proper thread.

* Improvements for cuda implementation of image.non_max_suppression_overlaps op helper.

* Fixed implementation issues with non_max_suppression_overlaps on cuda platform.

* Fixed skip for non_max_suppression_overlaps on cuda platform.

* Finalize implementation of image_suppression helper and tests.

* Cosmetic changes only.

											
										
										
											2019-10-30 12:43:45 +01:00
+								         * output:
 								         *     - vector with size M, where M <= output_size by int type
 								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * */
 								        #if NOT_EXCLUDED(OP_image_non_max_suppression)
 								        DECLARE_CUSTOM_OP(non_max_suppression, 2, 1, false, 0, 0);
-												Shugeo suppression overlaps (#9)

* Added non_max_suppression_overlaps op and tests.

* Refactored implementation of non_max_suppression_overlaps.

* Refactoring of implementation of non_max_suppression_overlaps op.

* Refactoring of implementation of non_max_suppression op.

* Fixed portion error.

* Added cuda frontends for image suppression ops.

* Eliminated crash with cuda arch on image.non_max_suppression_overlaps op.

* Improved implementation of image_suppression helper for cpu platform.

* The generic approach of non_max_suppression_overlaps op helper with cuda platform.

* Working cuda implementation of helper non_max_suppression_overlaps op.

* Eliminated waste comments.

* Improved implementations for both platforms

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of non_max_suppression op helper.

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of image.non_max_suppression_overlaps op helper.

* Added modifications into cuda implementation for image suppression overlaps op.

* Correct queue emulation with cuda implementation of non_max_suppression_overlaps op.

* Prefinal stage of cuda implementation of non_max_suppression_overlaps.

* Worked cuda implementation of non_max_suppresion_overlaps helper.

* Fixed return to proper thread.

* Improvements for cuda implementation of image.non_max_suppression_overlaps op helper.

* Fixed implementation issues with non_max_suppression_overlaps on cuda platform.

* Fixed skip for non_max_suppression_overlaps on cuda platform.

* Finalize implementation of image_suppression helper and tests.

* Cosmetic changes only.

											
										
										
											2019-10-30 12:43:45 +01:00
+								        #endif
-												Shugeo_release_fixes3 (#81)

* Implementation for non_max_suppression_v3 was added. Initial version

* Added check for overcome threshold.

* Added definition for V3 method.

* java remapping for NonMaxSuppressionV3

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed proporly processing of an empty output and test.

* Refactored op to less threshold data to float.

* Implemented cuda-based helper for non_max_suppression_v3 op.

* Fixed fake_quant_with_min_max_vars op.

* Fixed tests with float numbers.

* - assert now stops execution
- sortByKey/sortByValue now have input validation

Signed-off-by: raver119 <raver119@gmail.com>

* missing var

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed proper processing for zero max_size inputs.

* Refactored kernel callers.

* Fixed return statement for logdet op helper.

* Refactored unsorted segment SqrtN op.

* get back 8 tail bytes on CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* Refactored segment prod ops and helpers for cuda and tests.

* Additional test.

* CudaWorkspace tests updated for 8 tail bytes

Signed-off-by: raver119 <raver119@gmail.com>

* special atomic test

Signed-off-by: raver119 <raver119@gmail.com>

* atomicMul/atomicDiv fix for 16bit values

Signed-off-by: raver119 <raver119@gmail.com>

* Eliminated waste prints.

											
										
										
											2019-11-28 19:08:51 +01:00
+								        #if NOT_EXCLUDED(OP_image_non_max_suppression_v3)
 								                DECLARE_CUSTOM_OP(non_max_suppression_v3, 2, 1, false, 0, 0);
 								        #endif
-												Shugeo suppression overlaps (#9)

* Added non_max_suppression_overlaps op and tests.

* Refactored implementation of non_max_suppression_overlaps.

* Refactoring of implementation of non_max_suppression_overlaps op.

* Refactoring of implementation of non_max_suppression op.

* Fixed portion error.

* Added cuda frontends for image suppression ops.

* Eliminated crash with cuda arch on image.non_max_suppression_overlaps op.

* Improved implementation of image_suppression helper for cpu platform.

* The generic approach of non_max_suppression_overlaps op helper with cuda platform.

* Working cuda implementation of helper non_max_suppression_overlaps op.

* Eliminated waste comments.

* Improved implementations for both platforms

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of non_max_suppression op helper.

* Refactored cuda implementation of image.non_max_suppression_overlaps op helper.

* Improved cuda implementation of image.non_max_suppression_overlaps op helper.

* Added modifications into cuda implementation for image suppression overlaps op.

* Correct queue emulation with cuda implementation of non_max_suppression_overlaps op.

* Prefinal stage of cuda implementation of non_max_suppression_overlaps.

* Worked cuda implementation of non_max_suppresion_overlaps helper.

* Fixed return to proper thread.

* Improvements for cuda implementation of image.non_max_suppression_overlaps op helper.

* Fixed implementation issues with non_max_suppression_overlaps on cuda platform.

* Fixed skip for non_max_suppression_overlaps on cuda platform.

* Finalize implementation of image_suppression helper and tests.

* Cosmetic changes only.

											
										
										
											2019-10-30 12:43:45 +01:00
 								        /*
 								         * image.non_max_suppression_overlaps op.
 								         * input:
 								         *     0 - boxes - 2D-tensor with shape (num_boxes, 4) by float type
 								         *     1 - scales - 1D-tensor with shape (num_boxes) by float type
 								         *     2 - output_size - 0D-tensor by int type (optional)
 								         * float args:
 								         *     0 - overlap_threshold - threshold value for overlap checks (optional, by default 0.5)
 								         *     1 - score_threshold - the threshold for deciding when to remove boxes based on score (optional, by default -inf)
 								         * int args:
 								         *     0 - output_size - as arg 2 used for same target. Eigher this or arg 2 should be provided.
 								         *
 								         * output:
 								         *     0 - 1D integer tensor with shape [M], epresenting the selected indices from the overlaps tensor, where M <= max_output_size
 								         * */
 								        #if NOT_EXCLUDED(OP_image_non_max_suppression_overlaps)
 								        DECLARE_CUSTOM_OP(non_max_suppression_overlaps, 2, 1, false, 0, 0);
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								        #endif
 								        /*
 								         * cholesky op - decomposite positive square symetric matrix (or matricies when rank > 2).
 								         * input:
 								         *     0 - matricies - tensor with shape (..., N, N) by float type
 								         *
 								         * output - lower triangular matrix (matricies when rank > 2) with the same shape as input.
 								         * */
 								        #if NOT_EXCLUDED(OP_cholesky)
 								        DECLARE_OP(cholesky, 1, 1, true);
 								        #endif
 								        /*
 								         * nth_element - apply nth_element for last dimension of input tensor
 								         * input array:
 								         *     0 - input array
 								         *     1 - scalar tensor with n for operation. n should be less than last dimension
 								         *
 								         * output:
 								         *    0 - NDArray with the same shape as input
 								         */
 								        #if NOT_EXCLUDED(OP_nth_element)
 								        DECLARE_CUSTOM_OP(nth_element, 2, 1, false, 0, 0);
 								        #endif
-												Merge master to upstream (#7945)

* Shugeo strided slice zeros (#14)

* Modified strided_slice op to properly work with empty-like shapes.

* Fixed test for reduce_mean with empty-like input.

* [WIP] Last merge (#15)

* correct logsoftmax looss (#2)

* Small SameDiff listener fix (#4)

* Various fixes (#6)

* #7839 Fix for asXMatrix and tests

* #7866 EmbeddingSequenceLayer dtype fix + test

* #7856 SameDiff save/load stream methods

* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration

* EvaluationBinary 3d/4d

* More evaluation 3d/4d tests

* #7847 Evaluation empty checks

* Small test ifx

* #7848 Fix median edge case

* Improve DL4J samediff layer tests

* [WIP] FastText wrapper implemented (#8)

* FastText implemented

* Some fixes

* Fix shapes for wordsNearest

* Validation of input vectors

* Fixes

* Fixed test

* Thread tagged

* Some tweaks

* setContextClassLoader for DeallocatorServiceThread

* Numpy format tests (#1)

* Various fixes (#11)

* #7852 SameDiff gather fix

* #7892 SameDiff placeholder to constant conversion

* #7890 validate input rank for MLN/CG init methods

* Fix broken permute shape calculation

* Permute and gather fixes

* Tests

* #7850 LogSumExp fix + test

* Handful of test fixes

* Empty arrays with non-scalar shapes (#10)

* minor rearrangements for lambdas

* empty tensors with non-scalar shapes

* numpy empty tensors with non-scalar shapes

* few more empty tweaks

* Small fixes

* conv3d signature update

* micro fix in batchnorm mkldnn

* Import fixes

* Fix

* MKL-DNN update

* Small fill fix

* fill with empty input + test

* Fixes

* Small error improvement

* Fix

* one special test

* couple of fixes for lstm

* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone

* Fixes

* FP16

* Unsigned

* BFloat16

* Fill op - empty tweaks

* - couple of fixes for empty arrays construction
- stack updated

* strided slice fix

* one transform test

* provide method for reducing shapeInfo in case of input array is empty

* Fixed reduceAlongDimensions to use empty input properly.

* couple of broadcast tests

* couple of tests broadcast tests + tweak to make them pass

* add check of non-empty to methods producing sub-arrays

* Fixed reshapeC with zeros in shape.

* complete empty check in reduce_... legacy ops

* Concat and cumsum/prod

* Tweak to empty shape inference on import

* add empty check to the rest of reduce legacy ops

* one more test

* correct typo in evalReduceShapeInfoEmpty

* Added tests for reduce_* ops to tests with zero shapes.

* few more tests for empty reductions

* Fixed strided_slice op with empty case and tests.

* one more empty reduction test

* Fixed strided_slice test.

* add empty check to NDArray::reshapei

* infOrMax

* empty min/max with infinity tests

* made unstack working correctly with empty arrays

* few IndexReduce tests + tweaks for empty shapes

* add test for empty concat

* few tests fixed

* Validation fix for reductions on empty shapes

* Reverse fix

* Reduction shape calc fixes

* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs

* Range fix

* - NDArray constructor updated for scalars/empty arrays
- few tests fixed

* More fixes

* Empty creator fixes

* concat fix

* concat fix

* TF import tests: allow 'both all NaN' and 'both all inf' to pass

* Slice, zero fraction, and reshape fixes

* transpose, gather

* Zero fraction

* scalar cast fix

* Empty reduction axis support

* few more tests fixed

* Fixed input checks conforming with TF for concat op and tests.

* few tests fixed

* matmul scalar shape fix

* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.

* broadcast bool fix

* few more tests

* few more tests

* correct evalReduceShapeInfoEmpty

* argmax/argmin + tests

* one more empty edge case + one more test

* argmax/argmin/realdiv_bp tweaks

* empty reshape test + fix

* Helper fixes

* Small fixes

* Gather test fix

* Gather test fix

* Small fixes

* reduce scalar zero values

* scalar mean workaround

* Remove debug code

* along dim mean workaround

* one more test

* - equalsTo() tweak for empty arrays
- one more test

* broadcast tweaks

* [WIP] Fixing outstanding issues for NLP (#9)

* Avoid using not-inited objects

* Test fixed.

* Redundant method avoided for models like FastText

* KMeans++ implementation

* KMeans++ implementation

* Disable parallel execution

* KMeans++

* Tests

* Dev branch merge (#16)

* SameDiff: convertDataType and gradient check util improvements (#12)

* GradCheck util improvements

* StopGradient constructor + test

* SameDiff: Add datatype conversion

* Javadoc and add DataType.isNumerical()

* Small fix

* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)

* TFGraphTestAllHelper: check intermediates in execution order

* Add missing debug listener

* [WIP] lstmBlock fix + other changes (#13)

- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite

* Small test fix

* CheckNumerics op wrapper

* Fix some issues on master (#17)

* Fix DataVec test issue

* Fix issue with dl4j SameDiff output layer

* Dtype fix for lambda layers

* #7912 BertIterator dtype fix (use float32 not global default)

* [WIP] Next set of CUDA stuff (#7)

New CUDA implementations and improvements

* bad file

* Dev branch master merge (#23)

* SameDiff: convertDataType and gradient check util improvements (#12)

* GradCheck util improvements

* StopGradient constructor + test

* SameDiff: Add datatype conversion

* Javadoc and add DataType.isNumerical()

* Small fix

* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)

* TFGraphTestAllHelper: check intermediates in execution order

* Add missing debug listener

* [WIP] lstmBlock fix + other changes (#13)

- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite

* Small test fix

* CheckNumerics op wrapper

* Compatibility of deserialization (#18)

Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

* SameDiff: add activation gradient checking support for debugging (#19)

* SameDiff gradient checker: first pass on activation gradient checks

* Fixes + tests for activation gradient checking

* Javadoc

* [WIP] Some nd4j data type corrections (#20)

* Adjust data type

* Set correct Data type.

* Size of proper data type.

* fix averaged cpu load (#22)

* SameDiff ops, TF import and fixes (#24)

* CheckNumerics tests + fixes + misc fixes

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fake quant

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fixes

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* FakeQuantWithMinMaxArgs

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* CheckNumerics fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Javadoc

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Exception tweak

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix for out of scope stack allocated var use

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Ignores

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Ignore for known failing test (already logged issue)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Merge upstream to fork (#25)

* Add thousand-separator commas to TotalParams (#7915)

* Add thousand-separator commas to TotalParams

The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.

* Add thousand-separator commas to MultiLayerNetwork

Corresponding change to MultiLayerNetwork

Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>

* Update contributing and issue/PR templates (#7934)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix link to AdaDelta paper (#7942)

Fix link to AdaDelta paper hosted on matthewzeiler.com

Signed-off-by: Jxtps

* Fixes, and ignores for known/logged failing issues (#7943)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* SameDiff + DL4J/SameDiff: Multiple fixes (#28)

* #7919 HDF5 attribute buffer length fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7909 Arbiter constructor exception ux improvements

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7925 RNN output layer length checks

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7939 Add listener for validating inputs are not incorrectly modified

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7939 Integrate NonInplaceValidationListener into tests

* #7844 DL4J SameDiff fixes for variable minibatch size

* DL4J SameDiff fixes - ensure gradient for input placeholder is available

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tweaks to ExternalErrorsFunction - use placeholders, make more robust

* Another fix

* More fixes

* More SameDiff/DL4J fixes

* Scope out scalar array creation in BaseScalarOp

* Remove debug code

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* [WIP] Final dev branch merge (#29)

* SameDiff: convertDataType and gradient check util improvements (#12)

* GradCheck util improvements

* StopGradient constructor + test

* SameDiff: Add datatype conversion

* Javadoc and add DataType.isNumerical()

* Small fix

* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)

* TFGraphTestAllHelper: check intermediates in execution order

* Add missing debug listener

* [WIP] lstmBlock fix + other changes (#13)

- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite

* Small test fix

* CheckNumerics op wrapper

* Compatibility of deserialization (#18)

Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>

* SameDiff: add activation gradient checking support for debugging (#19)

* SameDiff gradient checker: first pass on activation gradient checks

* Fixes + tests for activation gradient checking

* Javadoc

* [WIP] Some nd4j data type corrections (#20)

* Adjust data type

* Set correct Data type.

* Size of proper data type.

* fix averaged cpu load (#22)

* [WIP] Multiple dataset iterators (#27)

* Splitting dataset into arbitrary number

* Fixes

* Multiple split of iterator

* Test

* Test

* Some fixes

* signature change

* one more tweak

Signed-off-by: raver119 <raver119@gmail.com>

* one more test for sequential use of DataSetIteratorSplitter

Signed-off-by: raver119 <raver119@gmail.com>

* Fixes

* Fixes

* one more test for Alexander

Signed-off-by: raver119 <raver119@gmail.com>

* Some fixes

* Some fixes

* one more test for Alexander

Signed-off-by: raver119 <raver119@gmail.com>

* minor test fix

Signed-off-by: raver119 <raver119@gmail.com>

* Some fixes

* Some fixes

* couple of assertions tweaked

Signed-off-by: raver119 <raver119@gmail.com>

* MDS splitter test :/

Signed-off-by: raver119 <raver119@gmail.com>

* Minor refactoring

* Multi dataset

* Some fixes

* More tests

* Small number of test fixes/improvements (failures on CI) (#31)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* [WIP] More CUDA stuff (#26)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* LRN BP CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* less memory

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed bug with crop_and_resize op helper.

* get rid of unnecessary index-calculation dunction

Signed-off-by: Yurii <yurii@skymind.io>

* Fixed sort with nth_element cuda-based helper.

* Refactored nth_element.

* Refactored nth_element op and tests.

* Modified usage of dim array with sortTad routine.

* Refactored main routine of helper for non_max_image_suppression op.

* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.

* fix vol2col cuda kernel

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* topK concept

Signed-off-by: raver119 <raver119@gmail.com>

* unsorted topK with scanWitdh of 1

Signed-off-by: raver119 <raver119@gmail.com>

* correct vol2col tests

* sorted/unsorted topK

Signed-off-by: raver119 <raver119@gmail.com>

* implementation and fixing col2im/col2vol

* Corrected usage flags with input/output with reverse op.

* dup is const now

Signed-off-by: raver119 <raver119@gmail.com>

* percentile op

Signed-off-by: raver119 <raver119@gmail.com>

* group tests for mapool2d

Signed-off-by: Yurii <yurii@skymind.io>

* special test for george

Signed-off-by: raver119 <raver119@gmail.com>

* less threads for sortTad

Signed-off-by: raver119 <raver119@gmail.com>

* provide conv2d for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* remove auther in sort tad kernel code

Signed-off-by: Yurii <yurii@skymind.io>

* provide depthwise_conv2d for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* - max_pooling_with_argmax
- null check for special use

Signed-off-by: raver119 <raver119@gmail.com>

* dts cuda

Signed-off-by: raver119 <raver119@gmail.com>

* provide sconv2d for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* std cuda

Signed-off-by: raver119 <raver119@gmail.com>

* Refactored non_max_suppression op to conform TF implementation.

* Improved suppression helper.

* provide pooling3d for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* minor lstm rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* more of minor lstm rearrangements

Signed-off-by: raver119 <raver119@gmail.com>

* (bi)dynamic_rnn

Signed-off-by: raver119 <raver119@gmail.com>

* templates init order

Signed-off-by: raver119 <raver119@gmail.com>

* Refactored non_max_suppression op.

* Added cuda kernel for non_max_suppression.

* CPU sort by key/value

Signed-off-by: raver119 <raver119@gmail.com>

* CPU sort TAD by key/value

Signed-off-by: raver119 <raver119@gmail.com>

* CPU sort TAD by key/value tests

Signed-off-by: raver119 <raver119@gmail.com>

* Eliminate compiler error with cuda implementation.

* - repaired gradCheck in cuda
- provide conv2d_bp for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* missed signature

Signed-off-by: raver119 <raver119@gmail.com>

* provide depthwise_conv2d_bp for cuda

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of lup helper with cuda kernel. Initial commit.

* further work on backprops for convolutions

Signed-off-by: Yurii <yurii@skymind.io>

* CUDA linear sort by key/val

Signed-off-by: raver119 <raver119@gmail.com>

* CUDA tad sort by key/val

Signed-off-by: raver119 <raver119@gmail.com>

* start providing of backprop for pooling2d/3d

Signed-off-by: Yurii <yurii@skymind.io>

* Added atomicAdd for bool datatype.

* dynamic partition concept

Signed-off-by: raver119 <raver119@gmail.com>

* dynamic partition concept

Signed-off-by: raver119 <raver119@gmail.com>

* dynamic partition scalar CUDA

Signed-off-by: raver119 <raver119@gmail.com>

* important comment

Signed-off-by: raver119 <raver119@gmail.com>

* fix pooling2d/3d backprop helpers

Signed-off-by: Yurii <yurii@skymind.io>

* Added non-linear test with dynamic_partition.

* Improved test for dynamic_partition.

* dynamic_partition TAD concept

Signed-off-by: raver119 <raver119@gmail.com>

* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix

Signed-off-by: raver119 <raver119@gmail.com>

* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d

Signed-off-by: Yurii <yurii@skymind.io>

* dynamic_stitch CUDA vector case

Signed-off-by: raver119 <raver119@gmail.com>

* dynamic_stitch CUDA TAD case concept

Signed-off-by: raver119 <raver119@gmail.com>

* dynamic_stitch CUDA TAD case impl

Signed-off-by: raver119 <raver119@gmail.com>

* Added tests for dynamic_stitch 3D-4D cases.

* minor tests tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed type check for dynamic stitch.

* min/max bp

Signed-off-by: raver119 <raver119@gmail.com>

* rewrite code for upsampling2d/3d cpu

Signed-off-by: Yurii <yurii@skymind.io>

* reduce min/max/norm_max bp

Signed-off-by: raver119 <raver119@gmail.com>

* lup implementation. Additional enhancements.

* provide code for upsamling2d/3d backprop

Signed-off-by: Yurii <yurii@skymind.io>

* weightedCrossEntropyWithLogits

Signed-off-by: raver119 <raver119@gmail.com>

* Fixed template math atomicMul for 64bit ints.

* Refactored dynamic_partition_bp op.

* inverseBroadcast fix

Signed-off-by: raver119 <raver119@gmail.com>

* DynamicPartitionBP test datatype fixed.

* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-06-27 17:37:04 +02:00
 								        /**
 								         * This op checks for Inf/NaN values within input array, and throws exception if there's at least one
 								         */
 								        #if NOT_EXCLUDED(OP_check_numerics)
 								        DECLARE_CUSTOM_OP(check_numerics, 2, 1, true, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								/**
 								         * fake_quant_with_min_max_vals - tf.quantization.fake_quant_with_min_max_vars
-												[WIP] More of CUDA operations (#69)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* - gruCell_bp further

Signed-off-by: Yurii <yurii@skymind.io>

* - further work on gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* Inverse matrix cublas implementation. Partial working revision.

* Separation of segment ops helpers. Max separation.

* Separated segment_min ops.

* Separation of segment_mean/sum/prod/sqrtN ops heleprs.

* Fixed diagonal processing with LUP decomposition.

* Modified inversion approach using current state of LU decomposition.

* Implementation of matrix_inverse op with cuda kernels. Working revision.

* Implemented sequence_mask cuda helper. Eliminated waste printf with matrix_inverse implementation. Added proper tests.

* - further work on gruCell_bp (ff/cuda)

Signed-off-by: Yurii <yurii@skymind.io>

* comment one test for gruCell_bp

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda static_rnn

Signed-off-by: Yurii <yurii@skymind.io>

* Refactored random_shuffle op to use new random generator.

* Refactored random_shuffle op helper.

* Fixed debug tests with random ops tests.

* Implement random_shuffle op cuda kernel helper and tests.

* - provide cuda scatter_update

Signed-off-by: Yurii <yurii@skymind.io>

* Implementation of random_shuffle for linear case with cuda kernels and tests.

* Implemented random_shuffle with cuda kernels. Final revision.

* - finally gruCell_bp is completed

Signed-off-by: Yurii <yurii@skymind.io>

* Dropout op cuda helper implementation.

* Implemented dropout_bp cuda helper.

* Implemented alpha_dropout_bp with cuda kernel helpers.

* Refactored helper.

* Implementation of suppresion helper with cuda kernels.

* - provide cpu code fot hsvToRgb, rgbToHsv, adjustHue

Signed-off-by: Yurii <yurii@skymind.io>

* Using sort by value method.

* Implementation of image.non_max_suppression op cuda-based helper.

* - correcting and testing adjust_hue, adjust_saturation cpu/cuda code

Signed-off-by: Yurii <yurii@skymind.io>

* Added cuda device prefixes to declarations.

* Implementation of hashcode op with cuda helper. Initital revision.

* rnn cu impl removed

Signed-off-by: raver119 <raver119@gmail.com>

											
										
										
											2019-07-20 07:58:44 +02:00
+								         *
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								         * input params:
 								         *    0 - NDArray (input)
 								         *    1 - 0D Tensor - min value
 								         *    2 - 0D Tensor - max value
 								         *
 								         * int params (optional):
 								         *    0 - num_bits (allowed interval [2, 16], default 8)
 								         *    1 - narrow_range (default False)
 								         *
 								         * output:
 								         *    0 - NDArray with the same shape as input
 								         */
 								        #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars)
 								        DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars, 3, 1, true, 0, -2);
 								        #endif
-												Added doc for fake_quant_with_min_max_per_channel op declaration.

											
										
										
											2019-10-10 16:13:33 +02:00
 								/**
 								         * fake_quant_with_min_max_vals_per_channel - tf.quantization.fake_quant_with_min_max_vars_per_channel
 								         *
 								         * input params:
 								         *    0 - NDArray (input) - at least 2D.
 								         *    1 - 1D Tensor - min values (min length equals to last dim of input)
 								         *    2 - 1D Tensor - max value (length equals to min)
 								         *
 								         * int params (optional):
 								         *    0 - num_bits (allowed interval [2, 16], default 8)
 								         *    1 - narrow_range (default False)
 								         *
 								         * output:
 								         *    0 - NDArray with the same shape as input
 								         */
-												The first approach for fake_quant_with_min_max_vars_per_channel op implementation.

											
										
										
											2019-10-08 18:00:41 +02:00
+								        #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel)
 								                DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars_per_channel, 3, 1, true, 0, -2);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
-												Implemented compare_and_bitpack op.

											
										
										
											2019-10-03 09:57:48 +02:00
+								        /**
-												Compare_and_bitpack: It was reimplemented. now the last dimension should be divisible by 8

Signed-off-by: AbdelRauf <rauf@konduit.ai>

											
										
										
											2021-02-28 19:19:59 +01:00
+								         * compare_and_bitpack - Compare values of input to threshold and pack resulting bits into a uint8
-												Implemented compare_and_bitpack op.

											
										
										
											2019-10-03 09:57:48 +02:00
+								         *
 								         * input params:
-												Compare_and_bitpack: It was reimplemented. now the last dimension should be divisible by 8

Signed-off-by: AbdelRauf <rauf@konduit.ai>

											
										
										
											2021-02-28 19:19:59 +01:00
+								         *    0 - NDArray (input). Note: last dimension should be divisibly by 8
-												compare_and_bitpack: correct documentation of the current implementation

Signed-off-by: AbdelRauf <rauf@konduit.ai>

											
										
										
											2021-02-28 19:26:09 +01:00
+								         *    1 - 0D Tensor - threshold to compare against. Note: when input is bool type, the threshold is ignored
-												Implemented compare_and_bitpack op.

											
										
										
											2019-10-03 09:57:48 +02:00
+								         *
 								         *
 								         * output:
-												Compare_and_bitpack: It was reimplemented. now the last dimension should be divisible by 8

Signed-off-by: AbdelRauf <rauf@konduit.ai>

											
										
										
											2021-02-28 19:19:59 +01:00
+								         *    0 - NDArray with the shape as {input.dim0,...input.dimLast/8} and type uint8
-												Implemented compare_and_bitpack op.

											
										
										
											2019-10-03 09:57:48 +02:00
+								         */
 								        #if NOT_EXCLUDED(OP_compare_and_bitpack)
 								        DECLARE_CUSTOM_OP(compare_and_bitpack, 2, 1, false, 0, 0);
 								        #endif
-												Eclipse Migration Initial Commit

											
										
										
											2019-06-06 14:21:15 +02:00
+								    }
 								}
 								#endif