cavis/libnd4j/include/ops/declarable/generic/blas/batched_gemm.cpp

151 lines
5.0 KiB
C++
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_batched_gemm)
#include <ops/declarable/headers/blas.h>
#include <ops/declarable/helpers/batched_gemm.h>
namespace nd4j {
namespace ops {
CUSTOM_OP_IMPL(batched_gemm, -1, -1, false, 0, 9) {
int transA = INT_ARG(0);
int transB = INT_ARG(1);
int M = INT_ARG(2);
int N = INT_ARG(3);
int K = INT_ARG(4);
int ldA = INT_ARG(5);
int ldB = INT_ARG(6);
int ldC = INT_ARG(7);
int batchSize = INT_ARG(8);
if (transA == 0)
transA = 111;
if (transB == 0)
transB = 111;
if (transA == 1)
transA = 112;
if (transB == 1)
transB = 112;
// basically A+B and 2 arrays of alpha and beta
int expectedWidth = batchSize * 2 + 2;
REQUIRE_TRUE((transA == 111 || transA == 112) && (transB == 111 || transB == 112), 0, "BatchedGemm: valid values for transA and transB are: 0/1 or 111/112, for NoTrans/Trans respectively")
REQUIRE_TRUE(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0, 0, "");
REQUIRE_TRUE(block.width() == expectedWidth, 0, "BatchedGemm: expected number of input arrays is %i, but got %i instead", expectedWidth, block.width());
auto alpha = INPUT_VARIABLE(0);
auto beta = INPUT_VARIABLE(1);
std::vector<NDArray*> vA(batchSize);
std::vector<NDArray*> vB(batchSize);
std::vector<NDArray*> vC(batchSize);
auto firstType = INPUT_VARIABLE(0)->dataType();
for(int e = 0; e < batchSize; e++) {
vA[e] = INPUT_VARIABLE(e+2);
vB[e] = INPUT_VARIABLE(e+2+batchSize);
vC[e] = OUTPUT_VARIABLE(e);
REQUIRE_TRUE(firstType == vC[e]->dataType(), 0, "BatchedGemm: all inputs and outputs must have same data type");
REQUIRE_TRUE(vA[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of A should be equal to 2", e);
REQUIRE_TRUE(vB[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of B should be equal to 2", e);
REQUIRE_TRUE(vC[e]->rankOf() == 2, 0, "BatchedGemm: batch %i, rank of C should be equal to 2", e);
REQUIRE_TRUE(M == vA[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.rows() should be equal to M", e);
REQUIRE_TRUE(N == vB[e]->sizeAt(1), 0, "BatchedGemm: batch %i, number of B.columns() should be equal to N", e);
REQUIRE_TRUE(K == vA[e]->sizeAt(1) && K == vB[e]->sizeAt(0), 0, "BatchedGemm: batch %i, number of A.columns() and B.rows() should be equal to K", e);
};
REQUIRE_TRUE(vA.size() == vB.size() && vA.size() == vC.size() && vA.size() == batchSize, 0, "BatchedGemm: mismatched numbers of A, B, C for unknown reason");
nd4j::ops::helpers::bgemm(vA, vB, vC, alpha, beta, transA, transB, M, N, K, ldA, ldB, ldC);
return Status::OK();
};
DECLARE_SHAPE_FN(batched_gemm) {
int transA = INT_ARG(0);
int transB = INT_ARG(1);
int M = INT_ARG(2);
int N = INT_ARG(3);
int K = INT_ARG(4);
int ldA = INT_ARG(5);
int ldB = INT_ARG(6);
int ldC = INT_ARG(7);
int batchSize = INT_ARG(8);
auto firstType = ArrayOptions::dataType(inputShape->at(0));
for (int e = 1; e < block.width(); e++) {
REQUIRE_TRUE(firstType == ArrayOptions::dataType(inputShape->at(1)), 0, "BatchedGemm: all inputs must have same data type");
}
auto shapeList = SHAPELIST();
if (!(M > 0 && N > 0 && K > 0 && ldA > 0 && ldB > 0 && ldC > 0 && batchSize > 0)) {
Nd4jLong *newShape;
ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong);
newShape[0] = 2;
newShape[1] = 1;
newShape[2] = 1;
newShape[3] = 1;
newShape[4] = 1;
newShape[5] = 0;
newShape[6] = 1;
newShape[7] = 99;
shapeList->push_back(newShape);
return shapeList;
}
std::vector<Nd4jLong> shape({M, N});
for (int e = 0; e < batchSize; e++) {
auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), 'f', shape);
shapeList->push_back(newShape);
}
return shapeList;
}
DECLARE_TYPES(batched_gemm) {
getOpDescriptor()
->setAllowedInputTypes({ALL_FLOATS})
// ->setAllowedInputTypes(1, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF})
->setAllowedOutputTypes({ALL_FLOATS});
}
}
}
#endif