cavis/libnd4j/include/ops/declarable/headers/blas.h

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
//  @author raver119@gmail.com
//
#ifndef LIBND4J_HEADERS_BLAS_H
#define LIBND4J_HEADERS_BLAS_H

#include <ops/declarable/headers/common.h>

namespace sd {
    namespace ops {
        
        /**
         * This op is general matmum implementation. Depending on inputs dimensionality output result might be different.
         * matrix x matrix = BLAS gemm
         * vector x matrix = BLAS gemm
         * vector x vector = BLAS dot
         * vector x scalar = element-wise mul
         * scalar x vector = element-wise mul
         *
         * Optional T arguments:
         * 0: alpha (where applicable)
         * 1: beta (where applicable)
         *
         * Optional Integer arguments:
         * 0: transA (where applicable)
         * 1: transB (where applicable)
         */
        #if NOT_EXCLUDED(OP_matmul)
        DECLARE_CUSTOM_OP(matmul, 2, 1, false, 0, -2);
        DECLARE_CUSTOM_OP(matmul_bp, 3, 2, false, 0, -2);
        #endif

        /**
         * tensorMmul/tensorDot operation
         * takes 2 ndarrays, and 2 sets of axes
         *
         * Integer argumens map:
         * IArgs[0] - number of axes along for first array
         * IArgs[1]... axes values for first array
         * IArgs[] - number of axes along for second array
         * IArgs[1]... axes values for second array
         */
        #if NOT_EXCLUDED(OP_tensormmul)
        DECLARE_CUSTOM_OP(tensormmul, 2, 1, false, 0, -1);
        DECLARE_CUSTOM_OP(tensormmul_bp, 3, 2, false, 0, -1);
        #endif

        /**
         * This op is simple implementation of BLAS AXPY method.
         * Math is: y += a * x;
         */
        #if NOT_EXCLUDED(OP_axpy)
        DECLARE_CONFIGURABLE_OP(axpy, 2, 1, false, -2, 0);
        #endif

        /**
         * This operation implements batched matrix multiplication
         * Expected arguments:
         * alpha: vector of T
         * beta: vector of T
         * ...: A, B matrices sequentially. i.e: AAAAABBBBB
         * 
         * Integer arguments:
         * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments
         * batchCount - number of operations in this batch
         * 
         * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch.
         */
        #if NOT_EXCLUDED(OP_batched_gemm)
        DECLARE_CUSTOM_OP(batched_gemm, -1, -1, false, 0, 9);
        #endif

        /**
         * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array:
         * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) 
         *
         * Input array:
         * x[..., Rows, Cols], the necessary condition is: rank of x >= 2
         * 
         * Outputs arrays:
         * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols
         * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors
         * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors
         * 
         * Integer arguments:
         * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case
         * IArgs[1] - bool, whether to calculate full-sized u and v
         * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely:
         *            if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied
         *            Recommended value is 16. 
         */
        #if NOT_EXCLUDED(OP_svd)
        DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3);   
        #endif
    }
}

#endif
Eclipse Migration Initial Commit 2019-06-06 15:21:15 +03:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`//`
			`// @author raver119@gmail.com`
			`//`
			`#ifndef LIBND4J_HEADERS_BLAS_H`
			`#define LIBND4J_HEADERS_BLAS_H`

			`#include <ops/declarable/headers/common.h>`

libnd4j polishing (#273) * initial set of include changes Signed-off-by: raver119 <raver119@gmail.com> * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * cuda includes rearrangements Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> * = namespace changed to sd - few CMake variables renamed with SD_ prefix Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> * LoopKind minor fix Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * sanitizer is optional now Signed-off-by: raver119 <raver119@gmail.com> * dev tests updated Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * last update Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> 2020-03-02 12:49:41 +03:00			`namespace sd {`
Eclipse Migration Initial Commit 2019-06-06 15:21:15 +03:00			`namespace ops {`

			`/**`
			`* This op is general matmum implementation. Depending on inputs dimensionality output result might be different.`
			`* matrix x matrix = BLAS gemm`
			`* vector x matrix = BLAS gemm`
			`* vector x vector = BLAS dot`
			`* vector x scalar = element-wise mul`
			`* scalar x vector = element-wise mul`
			`*`
			`* Optional T arguments:`
			`* 0: alpha (where applicable)`
			`* 1: beta (where applicable)`
			`*`
			`* Optional Integer arguments:`
			`* 0: transA (where applicable)`
			`* 1: transB (where applicable)`
			`*/`
			`#if NOT_EXCLUDED(OP_matmul)`
			`DECLARE_CUSTOM_OP(matmul, 2, 1, false, 0, -2);`
			`DECLARE_CUSTOM_OP(matmul_bp, 3, 2, false, 0, -2);`
			`#endif`

			`/**`
			`* tensorMmul/tensorDot operation`
			`* takes 2 ndarrays, and 2 sets of axes`
			`*`
			`* Integer argumens map:`
			`* IArgs[0] - number of axes along for first array`
			`* IArgs[1]... axes values for first array`
			`* IArgs[] - number of axes along for second array`
			`* IArgs[1]... axes values for second array`
			`*/`
			`#if NOT_EXCLUDED(OP_tensormmul)`
Oleh tenzor mmul (#231) * Libnd4j: TensorMMul backprop op #8174, raw implementation Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 merge master and some corrections Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 algorithm update, need testing, sync with master * Libnd4j: TensorMMul backprop op #8174 fixed incorrect B axes calculation Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 optimize axes identification and fix bug of indeces overlapping, added first test. need testing with different shapes Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 some fixes and improvements need more testing Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 fixed order of matrix multiply Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 fixed issue of incorrect axes definition, add tests based on TF, need additional testing for case dLdC not equal 1 Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 fixed scalar case add test Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 fixed bp algorithm, axes definition, need some mode testing with different orders combination f,c; c,f f,f and add some checks for inputs Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 some checks and corrections added tests, exists the problem with different input orders support A-f B-c and A-f B-f Signed-off-by: Oleg <oleg.semeniv@gmail.com> * Libnd4j: TensorMMul backprop op #8174 sync master Signed-off-by: Oleg <oleg.semeniv@gmail.com> * - correct bug in MmulHelper::tensorDot(a, b, c, axes_a, axes_b,permutForC) Signed-off-by: Yurii <iuriish@yahoo.com> * Libnd4j: TensorMMul backprop op #8174 code clean up and refactoring Signed-off-by: Oleg <oleg.semeniv@gmail.com> * - add check for linspase ordered permutations in ShapeUtils::evalShapeForTensorDot Signed-off-by: Yurii <iuriish@yahoo.com> * - provide additional code in shape::reshape stuff in order to reduce amount of allocation/copy operations during reshaping procedure Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on problem of wrong shape evaluation during permute/reshape procedures Signed-off-by: Yurii <iuriish@yahoo.com> * - still looking for bug reason in reshape/permute stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - correct bug in transform cuda native ops Signed-off-by: Yurii <iuriish@yahoo.com> * - correct bug in NDArray::assign Signed-off-by: Yurii <iuriish@yahoo.com> * - remove old shape::reshape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - add possibility to disable copy of old buffer to new buffer during reshape operation in NDArray class Signed-off-by: Yurii <iuriish@yahoo.com> * - correct bug in tensorDot which had to do with wrong pointers assigments Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: Oleh <oleg.semeniv@gmail.com> 2020-02-13 19:33:54 +02:00			`DECLARE_CUSTOM_OP(tensormmul, 2, 1, false, 0, -1);`
			`DECLARE_CUSTOM_OP(tensormmul_bp, 3, 2, false, 0, -1);`
Eclipse Migration Initial Commit 2019-06-06 15:21:15 +03:00			`#endif`

			`/**`
			`* This op is simple implementation of BLAS AXPY method.`
			`* Math is: y += a * x;`
			`*/`
			`#if NOT_EXCLUDED(OP_axpy)`
			`DECLARE_CONFIGURABLE_OP(axpy, 2, 1, false, -2, 0);`
			`#endif`

			`/**`
			`* This operation implements batched matrix multiplication`
			`* Expected arguments:`
			`* alpha: vector of T`
			`* beta: vector of T`
			`* ...: A, B matrices sequentially. i.e: AAAAABBBBB`
			`*`
			`* Integer arguments:`
			`* transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments`
			`* batchCount - number of operations in this batch`
			`*`
			`* PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch.`
			`*/`
			`#if NOT_EXCLUDED(OP_batched_gemm)`
			`DECLARE_CUSTOM_OP(batched_gemm, -1, -1, false, 0, 9);`
			`#endif`

			`/**`
			`* performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array:`
			`* x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :])`
			`*`
			`* Input array:`
			`* x[..., Rows, Cols], the necessary condition is: rank of x >= 2`
			`*`
			`* Outputs arrays:`
			`* s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols`
			`* u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors`
			`* v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors`
			`*`
			`* Integer arguments:`
			`* IArgs[0] - bool, whether to calculate u and v, s is calculated in any case`
			`* IArgs[1] - bool, whether to calculate full-sized u and v`
			`* IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely:`
			`* if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied`
			`* Recommended value is 16.`
			`*/`
			`#if NOT_EXCLUDED(OP_svd)`
			`DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3);`
			`#endif`
			`}`
			`}`

			`#endif`