cavis/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
//  @author raver119@gmail.com
//

#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_batched_gemm)

#include <ops/declarable/headers/blas.h>
#include <ops/declarable/helpers/batched_gemm.h>

namespace nd4j {
namespace ops  {

CUSTOM_OP_IMPL(batched_gemm, -1, -1, false, 0, 9) {

    int transA = INT_ARG(0);
    int transB = INT_ARG(1);
    int M = INT_ARG(2);
    int N = INT_ARG(3);
    int K = INT_ARG(4);
    int ldA = INT_ARG(5);
    int ldB = INT_ARG(6);
    int ldC = INT_ARG(7);
    int batchSize = INT_ARG(8);

    if (transA == 0)
        transA = 111;

    if (transB == 0)
        transB = 111;

    if (transA == 1)
        transA = 112;

    if (transB == 1)
        transB = 112;

    // basically A+B and 2 arrays of alpha and beta
    int expectedWidth = batchSize * 2 + 2;

    REQUIRE_TRUE((transA == 111 || transA == 112) && (transB == 111 || transB == 112), 0, "BatchedGemm: valid values for transA and transB are: 0/1 or 111/112, for NoTrans/Trans respectively")
    REQUIRE_TRUE(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0, 0, "");
    REQUIRE_TRUE(block.width() == expectedWidth, 0, "BatchedGemm: expected number of input arrays is %i, but got %i instead", expectedWidth, block.width());

    auto alpha = INPUT_VARIABLE(0);
    auto beta = INPUT_VARIABLE(1);

    std::vector<NDArray*> vA(batchSize);
    std::vector<NDArray*> vB(batchSize);
    std::vector<NDArray*> vC(batchSize);

    auto firstType = INPUT_VARIABLE(0)->dataType();
    for(int e = 0; e < batchSize; e++) {
        vA[e] = INPUT_VARIABLE(e+2);
        vB[e] = INPUT_VARIABLE(e+2+batchSize);
        vC[e] = OUTPUT_VARIABLE(e);


        REQUIRE_TRUE(firstType == vC[e]->dataType(), 0, "BatchedGemm: all inputs and outputs must have same data type");

        REQUIRE_TRUE(vA[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of A should be equal to 2", e);
        REQUIRE_TRUE(vB[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of B should be equal to 2", e);
        REQUIRE_TRUE(vC[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of C should be equal to 2", e);

        REQUIRE_TRUE(M == vA[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.rows() should be equal to M", e);
        REQUIRE_TRUE(N == vB[e]->sizeAt(1), 0, "BatchedGemm: batch %i, number of B.columns() should be equal to N", e);
        REQUIRE_TRUE(K == vA[e]->sizeAt(1) && K == vB[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.columns() and B.rows() should be equal to K", e);
    };

    REQUIRE_TRUE(vA.size() == vB.size() && vA.size() == vC.size() && vA.size() == batchSize, 0, "BatchedGemm: mismatched numbers of A, B, C for unknown reason");

    nd4j::ops::helpers::bgemm(vA, vB, vC, alpha, beta, transA, transB, M, N, K, ldA, ldB, ldC);

    return Status::OK();
};


DECLARE_SHAPE_FN(batched_gemm) {
    int transA = INT_ARG(0);
    int transB = INT_ARG(1);
    int M = INT_ARG(2);
    int N = INT_ARG(3);
    int K = INT_ARG(4);
    int ldA = INT_ARG(5);
    int ldB = INT_ARG(6);
    int ldC = INT_ARG(7);
    int batchSize = INT_ARG(8);

    auto firstType = ArrayOptions::dataType(inputShape->at(0));
    for (int e = 1; e < block.width(); e++) {
        REQUIRE_TRUE(firstType == ArrayOptions::dataType(inputShape->at(1)), 0, "BatchedGemm: all inputs must have same data type");
    }

    auto shapeList = SHAPELIST();

    if (!(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0)) {
        Nd4jLong *newShape;
        ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong);

        newShape[0] = 2;
        newShape[1] = 1;
        newShape[2] = 1;
        newShape[3] = 1;
        newShape[4] = 1;
        newShape[5] = 0;
        newShape[6] = 1;
        newShape[7] = 99;

        shapeList->push_back(newShape);
        return shapeList;
    }


    std::vector<Nd4jLong> shape({M, N});

    for (int e = 0; e < batchSize; e++) {
        auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'f', shape);
        shapeList->push_back(newShape);
    }

    return shapeList;
}

DECLARE_TYPES(batched_gemm) {
    getOpDescriptor()
            ->setAllowedInputTypes({ALL_FLOATS})
//                    ->setAllowedInputTypes(1, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF})
            ->setAllowedOutputTypes({ALL_FLOATS});
}


}
}

#endif
Eclipse Migration Initial Commit 2019-06-06 14:21:15 +02:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`//`
			`// @author raver119@gmail.com`
			`//`

			`#include <op_boilerplate.h>`
			`#if NOT_EXCLUDED(OP_batched_gemm)`

			`#include <ops/declarable/headers/blas.h>`
			`#include <ops/declarable/helpers/batched_gemm.h>`

			`namespace nd4j {`
			`namespace ops {`

			`CUSTOM_OP_IMPL(batched_gemm, -1, -1, false, 0, 9) {`

			`int transA = INT_ARG(0);`
			`int transB = INT_ARG(1);`
			`int M = INT_ARG(2);`
			`int N = INT_ARG(3);`
			`int K = INT_ARG(4);`
			`int ldA = INT_ARG(5);`
			`int ldB = INT_ARG(6);`
			`int ldC = INT_ARG(7);`
			`int batchSize = INT_ARG(8);`

			`if (transA == 0)`
			`transA = 111;`

			`if (transB == 0)`
			`transB = 111;`

			`if (transA == 1)`
			`transA = 112;`

			`if (transB == 1)`
			`transB = 112;`

			`// basically A+B and 2 arrays of alpha and beta`
			`int expectedWidth = batchSize * 2 + 2;`

			`REQUIRE_TRUE((transA == 111 \|\| transA == 112) && (transB == 111 \|\| transB == 112), 0, "BatchedGemm: valid values for transA and transB are: 0/1 or 111/112, for NoTrans/Trans respectively")`
			`REQUIRE_TRUE(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0, 0, "");`
			`REQUIRE_TRUE(block.width() == expectedWidth, 0, "BatchedGemm: expected number of input arrays is %i, but got %i instead", expectedWidth, block.width());`

			`auto alpha = INPUT_VARIABLE(0);`
			`auto beta = INPUT_VARIABLE(1);`

			`std::vector<NDArray*> vA(batchSize);`
			`std::vector<NDArray*> vB(batchSize);`
			`std::vector<NDArray*> vC(batchSize);`

			`auto firstType = INPUT_VARIABLE(0)->dataType();`
			`for(int e = 0; e < batchSize; e++) {`
			`vA[e] = INPUT_VARIABLE(e+2);`
			`vB[e] = INPUT_VARIABLE(e+2+batchSize);`
			`vC[e] = OUTPUT_VARIABLE(e);`


			`REQUIRE_TRUE(firstType == vC[e]->dataType(), 0, "BatchedGemm: all inputs and outputs must have same data type");`

			`REQUIRE_TRUE(vA[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of A should be equal to 2", e);`
			`REQUIRE_TRUE(vB[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of B should be equal to 2", e);`
			`REQUIRE_TRUE(vC[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of C should be equal to 2", e);`

			`REQUIRE_TRUE(M == vA[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.rows() should be equal to M", e);`
			`REQUIRE_TRUE(N == vB[e]->sizeAt(1), 0, "BatchedGemm: batch %i, number of B.columns() should be equal to N", e);`
			`REQUIRE_TRUE(K == vA[e]->sizeAt(1) && K == vB[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.columns() and B.rows() should be equal to K", e);`
			`};`

			`REQUIRE_TRUE(vA.size() == vB.size() && vA.size() == vC.size() && vA.size() == batchSize, 0, "BatchedGemm: mismatched numbers of A, B, C for unknown reason");`

			`nd4j::ops::helpers::bgemm(vA, vB, vC, alpha, beta, transA, transB, M, N, K, ldA, ldB, ldC);`

			`return Status::OK();`
			`};`


			`DECLARE_SHAPE_FN(batched_gemm) {`
			`int transA = INT_ARG(0);`
			`int transB = INT_ARG(1);`
			`int M = INT_ARG(2);`
			`int N = INT_ARG(3);`
			`int K = INT_ARG(4);`
			`int ldA = INT_ARG(5);`
			`int ldB = INT_ARG(6);`
			`int ldC = INT_ARG(7);`
			`int batchSize = INT_ARG(8);`

			`auto firstType = ArrayOptions::dataType(inputShape->at(0));`
			`for (int e = 1; e < block.width(); e++) {`
			`REQUIRE_TRUE(firstType == ArrayOptions::dataType(inputShape->at(1)), 0, "BatchedGemm: all inputs must have same data type");`
			`}`

			`auto shapeList = SHAPELIST();`

			`if (!(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0)) {`
			`Nd4jLong *newShape;`
			`ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong);`

			`newShape[0] = 2;`
			`newShape[1] = 1;`
			`newShape[2] = 1;`
			`newShape[3] = 1;`
			`newShape[4] = 1;`
			`newShape[5] = 0;`
			`newShape[6] = 1;`
			`newShape[7] = 99;`

			`shapeList->push_back(newShape);`
			`return shapeList;`
			`}`


			`std::vector<Nd4jLong> shape({M, N});`

			`for (int e = 0; e < batchSize; e++) {`
			`auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'f', shape);`
			`shapeList->push_back(newShape);`
			`}`

			`return shapeList;`
			`}`

			`DECLARE_TYPES(batched_gemm) {`
			`getOpDescriptor()`
			`->setAllowedInputTypes({ALL_FLOATS})`
			`// ->setAllowedInputTypes(1, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF})`
			`->setAllowedOutputTypes({ALL_FLOATS});`
			`}`


			`}`
			`}`

			`#endif`