444 lines
20 KiB
C++
444 lines
20 KiB
C++
/*******************************************************************************
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
*
|
|
* This program and the accompanying materials are made available under the
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
******************************************************************************/
|
|
|
|
//
|
|
// Created by raver119 on 08.10.2017.
|
|
//
|
|
|
|
#include <op_boilerplate.h>
|
|
#if NOT_EXCLUDED(OP_fullconv3d)
|
|
|
|
#include <ops/declarable/CustomOperations.h>
|
|
#include <ops/declarable/helpers/convolutions.h>
|
|
|
|
namespace nd4j {
|
|
namespace ops {
|
|
//////////////////////////////////////////////////////////////////////////
|
|
CUSTOM_OP_IMPL(fullconv3d, 5, 1, false, 0, 13) {
|
|
// auto input = INPUT_VARIABLE(0);
|
|
// auto weights = INPUT_VARIABLE(1);
|
|
// auto bias = INPUT_VARIABLE(2);
|
|
// auto columns = INPUT_VARIABLE(3);
|
|
// auto ones = INPUT_VARIABLE(4);
|
|
|
|
// REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());
|
|
// REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());
|
|
|
|
// // strides
|
|
// int dT = INT_ARG(0);
|
|
// int dW = INT_ARG(1);
|
|
// int dH = INT_ARG(2);
|
|
|
|
// // padding
|
|
// int pT = INT_ARG(3);
|
|
// int pW = INT_ARG(4);
|
|
// int pH = INT_ARG(5);
|
|
|
|
// // dilation
|
|
// int dilationT = INT_ARG(6);
|
|
// int dilationW = INT_ARG(7);
|
|
// int dilationH = INT_ARG(8);
|
|
|
|
// // output padding
|
|
// int aT = INT_ARG(9);
|
|
// int aW = INT_ARG(10);
|
|
// int aH = INT_ARG(11);
|
|
|
|
// // bias
|
|
// bool biasUsed = INT_ARG(12) != 0;
|
|
|
|
|
|
// REQUIRE_TRUE(dT > 0 && dW > 0 && dH > 0, 11,
|
|
// "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
|
|
// REQUIRE_TRUE(dilationT > 0 && dilationW > 0 && dilationH > 0, 15,
|
|
// "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d",
|
|
// dilationT, dilationH, dilationW);
|
|
// REQUIRE_TRUE((aT < dT || aT < dilationT)
|
|
// && (aW < dW || aW < dilationW)
|
|
// && (aH < dH || aH < dilationH), 15,
|
|
// "output padding must be smaller than either stride or dilation,"
|
|
// " but got aT: %d aH: %d aW: %d dT: %d dH: %d dW: %d "
|
|
// "dilationT: %d dilationH: %d dilationW: %d",
|
|
// aT, aH, aW, dT, dH, dW, dilationT, dilationH, dilationW);
|
|
|
|
// auto output = this->getZ(block);
|
|
|
|
// const int nInputPlane = weights->shapeOf()[0];
|
|
// const int nOutputPlane = weights->shapeOf()[1];
|
|
// const int kT = weights->shapeOf()[2];
|
|
// const int kH = weights->shapeOf()[3];
|
|
// const int kW = weights->shapeOf()[4];
|
|
|
|
// const Nd4jLong inputWidth = input->shapeOf()[4];
|
|
// const Nd4jLong inputHeight = input->shapeOf()[3];
|
|
// const Nd4jLong inputDepth = input->shapeOf()[2];
|
|
// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
|
|
// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
|
|
// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;
|
|
|
|
// const Nd4jLong batchSize = input->shapeOf()[0];
|
|
|
|
// REQUIRE_TRUE(output->isSameShape({ (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth}), 0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));
|
|
|
|
// std::unique_ptr<ResultSet> inputs(input->allExamples());
|
|
// std::unique_ptr<ResultSet> outputs(output->allExamples());
|
|
// for (int e = 0; e < batchSize; e++) {
|
|
// auto tadIn = inputs->at(e);
|
|
// auto tadOut = outputs->at(e);
|
|
|
|
// const int m = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];
|
|
// const int n = columns->shapeOf()[1];
|
|
// const int k = weights->shapeOf()[0];
|
|
|
|
// // FIXME: mmul helper should be used here
|
|
// /*
|
|
// nd4j::blas::GEMM<T>::op('c', 'n', 't', m, n, k,
|
|
// 1.0,
|
|
// tadIn->getBuffer(), n,
|
|
// weights->getBuffer(), m,
|
|
// 0.0,
|
|
// columns->getBuffer(), n);
|
|
// */
|
|
|
|
// // ConvolutionUtils<T>::_col2vol(columns->getBuffer(),
|
|
// // nOutputPlane, outputDepth, outputHeight, outputWidth,
|
|
// // inputDepth, inputHeight, inputWidth,
|
|
// // kT, kH, kW,
|
|
// // pT, pH, pW,
|
|
// // dT, dH, dW,
|
|
// // dilationT, dilationH, dilationW,
|
|
// // tadOut->getBuffer());
|
|
// ConvolutionUtils::col2vol(*columns, *tadOut, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);
|
|
|
|
|
|
// const int m_ = nOutputPlane;
|
|
// const int n_ = outputDepth * outputHeight * outputWidth;
|
|
// const int k_ = 1;
|
|
|
|
// if (biasUsed) {
|
|
// // FIXME: mmul helper should be used here
|
|
// /*
|
|
// nd4j::blas::GEMM<T>::op('c', 't', 'n', n_, m_, k_,
|
|
// 1.0,
|
|
// ones->getBuffer(), k_,
|
|
// bias->getBuffer(), k_,
|
|
// 1.0,
|
|
// tadOut->getBuffer(), n_);
|
|
// */
|
|
// }
|
|
// }
|
|
|
|
// STORE_RESULT(*output);
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
DECLARE_TYPES(fullconv3d) {
|
|
getOpDescriptor()
|
|
->setAllowedInputTypes(nd4j::DataType::ANY)
|
|
->setAllowedOutputTypes({ALL_FLOATS});
|
|
}
|
|
|
|
DECLARE_SHAPE_FN(fullconv3d) {
|
|
// auto input = inputShape->at(0);
|
|
// auto weights = inputShape->at(1);
|
|
|
|
// // strides
|
|
// int dT = INT_ARG(0);
|
|
// int dW = INT_ARG(1);
|
|
// int dH = INT_ARG(2);
|
|
|
|
// // padding
|
|
// int pT = INT_ARG(3);
|
|
// int pW = INT_ARG(4);
|
|
// int pH = INT_ARG(5);
|
|
|
|
// // dilation
|
|
// int dilationT = INT_ARG(6);
|
|
// int dilationW = INT_ARG(7);
|
|
// int dilationH = INT_ARG(8);
|
|
|
|
// // output padding
|
|
// int aT = INT_ARG(9);
|
|
// int aW = INT_ARG(10);
|
|
// int aH = INT_ARG(11);
|
|
|
|
// // bias
|
|
// bool biasUsed = INT_ARG(12) != 0;
|
|
|
|
// Nd4jLong *shapeOf;
|
|
// Nd4jLong *newShape;
|
|
// ALLOCATE(shapeOf, block.getWorkspace(), 5, Nd4jLong);
|
|
// ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(5), Nd4jLong);
|
|
|
|
// const int nInputPlane = weights[1];
|
|
// const int nOutputPlane = weights[2];
|
|
// const int kT = weights[3];
|
|
// const int kH = weights[4];
|
|
// const int kW = weights[5];
|
|
|
|
// const int batchSize = input[1];
|
|
// const Nd4jLong inputWidth = input[5];
|
|
// const Nd4jLong inputHeight = input[4];
|
|
// const Nd4jLong inputDepth = input[3];
|
|
// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
|
|
// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
|
|
// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;
|
|
|
|
// nd4j::ArrayUtils::toLongPtr({(Nd4jLong) batchSize, (Nd4jLong)nOutputPlane, (Nd4jLong)outputDepth, (Nd4jLong)outputHeight, (Nd4jLong)outputWidth}, shapeOf);
|
|
|
|
// shape::shapeBuffer(5, shapeOf, newShape);
|
|
|
|
// RELEASE(shapeOf, block.getWorkspace());
|
|
|
|
// return SHAPELIST(newShape);
|
|
return SHAPELIST();
|
|
}
|
|
|
|
DECLARE_TYPES(fullconv3d_bp) {
|
|
getOpDescriptor()
|
|
->setAllowedInputTypes(nd4j::DataType::ANY)
|
|
->setAllowedOutputTypes({ALL_FLOATS});
|
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
|
CUSTOM_OP_IMPL(fullconv3d_bp, 5, 1, false, 0, 13) {
|
|
// auto input = INPUT_VARIABLE(0);
|
|
// auto gradNext = INPUT_VARIABLE(1);
|
|
// auto weights = INPUT_VARIABLE(2);
|
|
// auto finput = INPUT_VARIABLE(3);
|
|
|
|
// // not used
|
|
// auto fgradInput = INPUT_VARIABLE(4);
|
|
|
|
|
|
// REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());
|
|
// REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());
|
|
|
|
// auto output = OUTPUT_VARIABLE(0);
|
|
|
|
// int dT = INT_ARG(0);
|
|
// int dW = INT_ARG(1);
|
|
// int dH = INT_ARG(2);
|
|
// int pT = INT_ARG(3);
|
|
// int pW = INT_ARG(4);
|
|
// int pH = INT_ARG(5);
|
|
// int dilationT = INT_ARG(6);
|
|
// int dilationW = INT_ARG(7);
|
|
// int dilationH = INT_ARG(8);
|
|
// int aT = INT_ARG(9);
|
|
// int aW = INT_ARG(10);
|
|
// int aH = INT_ARG(11);
|
|
// bool biasUsed = INT_ARG(12) != 0;
|
|
|
|
// const int nInputPlane = (int)weights->shapeOf()[0];
|
|
// const int nOutputPlane = (int)weights->shapeOf()[1];
|
|
// const int kT = (int)weights->shapeOf()[2];
|
|
// const int kH = (int)weights->shapeOf()[3];
|
|
// const int kW = (int)weights->shapeOf()[4];
|
|
|
|
// const Nd4jLong inputWidth = input->shapeOf()[4];
|
|
// const Nd4jLong inputHeight = input->shapeOf()[3];
|
|
// const Nd4jLong inputDepth = input->shapeOf()[2];
|
|
// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
|
|
// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
|
|
// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;
|
|
|
|
// const Nd4jLong batchSize = input->shapeOf()[0];
|
|
|
|
|
|
// REQUIRE_TRUE(output->isSameShape({(int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth}) ,0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));
|
|
|
|
// output->assign(0.0);
|
|
|
|
// // FIXME: non-inplace reshape!!!!
|
|
// NDArray *gradColumns;
|
|
// //auto gradColumns = finput->reshape('c', {nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth });
|
|
|
|
// std::unique_ptr<ResultSet> tadsNext(gradNext->allExamples());
|
|
// std::unique_ptr<ResultSet> tadsOutput(output->allExamples());
|
|
// for (int e = 0; e < tadsNext->size(); e++) {
|
|
// auto tadNext = tadsNext->at(e);
|
|
// auto tadOutput = tadsOutput->at(e);
|
|
|
|
// // ConvolutionUtils<T>::_vol2col(
|
|
// // tadNext->getBuffer(),
|
|
// // nOutputPlane, outputDepth, outputHeight, outputWidth,
|
|
// // kT, kH, kW,
|
|
// // pT, pH, pW,
|
|
// // dT, dH, dW,
|
|
// // dilationT, dilationH, dilationW,
|
|
// // gradColumns->getBuffer());
|
|
// ConvolutionUtils::vol2col(*tadNext, *gradColumns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);
|
|
|
|
// const auto m = weights->shapeOf()[0];
|
|
// const auto n = gradColumns->shapeOf()[1];
|
|
// const auto k = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];
|
|
|
|
// // FIXME: mmul helper should be used here
|
|
// /*
|
|
// nd4j::blas::GEMM<T>::op('f', 'n', 'n',
|
|
// n, m, k,
|
|
// 1.0f,
|
|
// gradColumns->getBuffer(), n,
|
|
// weights->getBuffer(), k,
|
|
// 0,
|
|
// tadOutput->getBuffer(), n
|
|
|
|
// );
|
|
// */
|
|
// }
|
|
|
|
|
|
// STORE_RESULT(*output);
|
|
|
|
// delete gradColumns;
|
|
return ND4J_STATUS_OK;
|
|
}
|
|
DECLARE_SHAPE_FN(fullconv3d_bp) {
|
|
// output shape equals to input shape, all out of sudden
|
|
// Nd4jLong* newShape;
|
|
// COPY_SHAPE(inputShape->at(0), newShape);
|
|
|
|
// return SHAPELIST(newShape);
|
|
return SHAPELIST();
|
|
}
|
|
|
|
DECLARE_TYPES(fullconv3d_grad) {
|
|
getOpDescriptor()
|
|
->setAllowedInputTypes(nd4j::DataType::ANY)
|
|
->setAllowedOutputTypes({ALL_FLOATS});
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
CUSTOM_OP_IMPL(fullconv3d_grad, 4, 2, false, 1, 13) {
|
|
// auto input = INPUT_VARIABLE(0);
|
|
// auto epsilon = INPUT_VARIABLE(1);
|
|
// auto columns = INPUT_VARIABLE(2);
|
|
// auto ones = INPUT_VARIABLE(3);
|
|
|
|
// REQUIRE_TRUE(input->rankOf() == epsilon->rankOf(), 0, "Rank of input (%i) & epsilon (%i) should be equal", input->rankOf(), epsilon->rankOf());
|
|
// REQUIRE_TRUE(input->sizeAt(0) == epsilon->sizeAt(0), 1, "Batch size should be equal for input and epsilon");
|
|
|
|
// auto gradWeight = OUTPUT_VARIABLE(0);
|
|
// auto gradBias = OUTPUT_VARIABLE(1);
|
|
|
|
// REQUIRE_TRUE(gradBias->sizeAt(0) == gradWeight->sizeAt(1), 0, "Bias shape mismatch");
|
|
|
|
// int dT = INT_ARG(0);
|
|
// int dW = INT_ARG(1);
|
|
// int dH = INT_ARG(2);
|
|
// int pT = INT_ARG(3);
|
|
// int pW = INT_ARG(4);
|
|
// int pH = INT_ARG(5);
|
|
// int dilationT = INT_ARG(6);
|
|
// int dilationW = INT_ARG(7);
|
|
// int dilationH = INT_ARG(8);
|
|
// int aT = INT_ARG(9);
|
|
// int aW = INT_ARG(10);
|
|
// int aH = INT_ARG(11);
|
|
// bool biasUsed = INT_ARG(12) != 0;
|
|
|
|
// double scale = block.getTArguments()->at(0);
|
|
|
|
// int nInputPlane = (int)gradWeight->shapeOf()[0];
|
|
// int nOutputPlane = (int)gradWeight->shapeOf()[1];
|
|
// int kT = (int)gradWeight->shapeOf()[2];
|
|
// int kH = (int)gradWeight->shapeOf()[3];
|
|
// int kW = (int)gradWeight->shapeOf()[4];
|
|
|
|
|
|
// const Nd4jLong inputWidth = input->shapeOf()[4];
|
|
// const Nd4jLong inputHeight = input->shapeOf()[3];
|
|
// const Nd4jLong inputDepth = input->shapeOf()[2];
|
|
// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
|
|
// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
|
|
// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;
|
|
|
|
|
|
// REQUIRE_TRUE(gradWeight->isContiguous(), 0, "gradWight should be continuous");
|
|
// REQUIRE_TRUE(gradBias->isContiguous(), 0, "gradBias should be continuous");
|
|
// REQUIRE_TRUE(ones->rankOf() == 3, 0, "Ones should have rank 3, got %i instead", ones->rankOf());
|
|
|
|
// REQUIRE_TRUE(ones->isSameShape({outputDepth, outputHeight, outputWidth}), 0, "");
|
|
|
|
// ones->assign(1.0);
|
|
|
|
// std::unique_ptr<ResultSet> tadsInput(input->allExamples());
|
|
// std::unique_ptr<ResultSet> tadsEpsilon(epsilon->allExamples());
|
|
|
|
// for (int e = 0; e < tadsInput->size(); e++) {
|
|
// auto tadInput = tadsInput->at(e);
|
|
// auto tadEpsilon = tadsEpsilon->at(e);
|
|
|
|
// // ConvolutionUtils<T>::_vol2col(
|
|
// // tadEpsilon->getBuffer(), nOutputPlane,
|
|
// // outputDepth, outputHeight, outputWidth,
|
|
// // kT, kH, kW,
|
|
// // pT, pH, pW,
|
|
// // dT, dH, dW,
|
|
// // dilationT, dilationH, dilationW,
|
|
// // columns->getBuffer()
|
|
// // );
|
|
// ConvolutionUtils::vol2col(*tadEpsilon, *columns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);
|
|
// const Nd4jLong n = columns->shapeOf()[0]; // nOutputPlane * kt * kh * kw
|
|
// const Nd4jLong m = tadInput->shapeOf()[0]; // nInputPlane
|
|
// const Nd4jLong k = columns->shapeOf()[1];
|
|
|
|
// // FIXME: mmul helper should be used here
|
|
// /**
|
|
// nd4j::blas::GEMM<T>::op('f', 't', 'n',
|
|
// n, m, k,
|
|
// scale,
|
|
// columns->getBuffer(), k,
|
|
// tadInput->getBuffer(), k,
|
|
// 1,
|
|
// gradWeight->getBuffer(), n);
|
|
// */
|
|
|
|
// const Nd4jLong m_ = nOutputPlane;
|
|
// const Nd4jLong k_ = outputDepth * outputHeight * outputWidth;
|
|
|
|
|
|
// if (gradBias) {
|
|
// // FIXME: mmul helper should be used here
|
|
// /*
|
|
// nd4j::blas::GEMV<T>::op('t',
|
|
// k_, m_,
|
|
// scale,
|
|
// tadEpsilon->getBuffer(), k_,
|
|
// ones->getBuffer(), 1, (T)1.0f,
|
|
// gradBias->getBuffer(), 1);
|
|
// */
|
|
// }
|
|
// }
|
|
|
|
|
|
// STORE_2_RESULTS(*gradWeight, *gradBias);
|
|
|
|
return Status::OK();
|
|
}
|
|
DECLARE_SHAPE_FN(fullconv3d_grad) {
|
|
// auto list = SHAPELIST();
|
|
|
|
// _grad ops MUST have output arrays provided
|
|
|
|
// return list;
|
|
return SHAPELIST();
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif |