cavis/libnd4j/include/ops/declarable/generic/convo/fullconv3d.cpp

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
// Created by raver119 on 08.10.2017.
//

#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_fullconv3d)

#include <ops/declarable/CustomOperations.h>
#include <ops/declarable/helpers/convolutions.h>

namespace nd4j {
    namespace ops {
        //////////////////////////////////////////////////////////////////////////
        CUSTOM_OP_IMPL(fullconv3d, 5, 1, false, 0, 13) {
            // auto input = INPUT_VARIABLE(0);
            // auto weights = INPUT_VARIABLE(1);
            // auto bias = INPUT_VARIABLE(2);
            // auto columns = INPUT_VARIABLE(3);
            // auto ones = INPUT_VARIABLE(4);

            // REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());
            // REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());

            // // strides
            // int dT = INT_ARG(0);
            // int dW = INT_ARG(1);
            // int dH = INT_ARG(2);

            // // padding
            // int pT = INT_ARG(3);
            // int pW = INT_ARG(4);
            // int pH = INT_ARG(5);

            // // dilation
            // int dilationT = INT_ARG(6);
            // int dilationW = INT_ARG(7);
            // int dilationH = INT_ARG(8);

            // // output padding
            // int aT = INT_ARG(9);
            // int aW = INT_ARG(10);
            // int aH = INT_ARG(11);

            // // bias
            // bool biasUsed = INT_ARG(12) != 0;


            // REQUIRE_TRUE(dT > 0 && dW > 0 && dH > 0, 11,
            //              "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
            // REQUIRE_TRUE(dilationT > 0 && dilationW > 0 && dilationH > 0, 15,
            //              "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d",
            //              dilationT, dilationH, dilationW);
            // REQUIRE_TRUE((aT < dT || aT < dilationT)
            //              && (aW < dW || aW < dilationW)
            //              && (aH < dH || aH < dilationH), 15,
            //              "output padding must be smaller than either stride or dilation,"
            //                      " but got aT: %d aH: %d aW: %d dT: %d dH: %d dW: %d "
            //                      "dilationT: %d dilationH: %d dilationW: %d",
            //              aT, aH, aW, dT, dH, dW, dilationT, dilationH, dilationW);

            // auto output = this->getZ(block);

            // const int nInputPlane  = weights->shapeOf()[0];
            // const int nOutputPlane = weights->shapeOf()[1];
            // const int kT           = weights->shapeOf()[2];
            // const int kH           = weights->shapeOf()[3];
            // const int kW           = weights->shapeOf()[4];

            // const Nd4jLong inputWidth   = input->shapeOf()[4];
            // const Nd4jLong inputHeight  = input->shapeOf()[3];
            // const Nd4jLong inputDepth   = input->shapeOf()[2];
            // const Nd4jLong outputDepth  = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
            // const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
            // const Nd4jLong outputWidth  = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;

            // const Nd4jLong batchSize = input->shapeOf()[0];

            // REQUIRE_TRUE(output->isSameShape({ (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth}), 0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));

            // std::unique_ptr<ResultSet> inputs(input->allExamples());
            // std::unique_ptr<ResultSet> outputs(output->allExamples());
            // for (int e = 0; e < batchSize; e++) {
            //     auto tadIn = inputs->at(e);
            //     auto tadOut = outputs->at(e);

            //     const int m = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];
            //     const int n = columns->shapeOf()[1];
            //     const int k = weights->shapeOf()[0];

            //     // FIXME: mmul helper should be used here
            //     /*
            //     nd4j::blas::GEMM<T>::op('c', 'n', 't', m, n, k,
            //                             1.0,
            //                             tadIn->getBuffer(), n,
            //                             weights->getBuffer(), m,
            //                             0.0,
            //                             columns->getBuffer(), n);
            //                             */

            //     // ConvolutionUtils<T>::_col2vol(columns->getBuffer(),
            //     //                               nOutputPlane, outputDepth, outputHeight, outputWidth,
            //     //                               inputDepth, inputHeight, inputWidth,
            //     //                               kT, kH, kW,
            //     //                               pT, pH, pW,
            //     //                               dT, dH, dW,
            //     //                               dilationT,  dilationH,  dilationW,
            //     //                               tadOut->getBuffer());
            //     ConvolutionUtils::col2vol(*columns, *tadOut, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);


            //     const int m_ = nOutputPlane;
            //     const int n_ = outputDepth * outputHeight * outputWidth;
            //     const int k_ = 1;

            //     if (biasUsed) {
            //         // FIXME: mmul helper should be used here
            //         /*
            //         nd4j::blas::GEMM<T>::op('c', 't', 'n', n_, m_, k_,
            //                                 1.0,
            //                                 ones->getBuffer(), k_,
            //                                 bias->getBuffer(), k_,
            //                                 1.0,
            //                                 tadOut->getBuffer(), n_);
            //                                 */
            //     }
            // }

            // STORE_RESULT(*output);

            return Status::OK();
        }

        DECLARE_TYPES(fullconv3d) {
            getOpDescriptor()
                    ->setAllowedInputTypes(nd4j::DataType::ANY)
                    ->setAllowedOutputTypes({ALL_FLOATS});
        }

        DECLARE_SHAPE_FN(fullconv3d) {
            // auto input = inputShape->at(0);
            // auto weights = inputShape->at(1);

            // // strides
            // int dT = INT_ARG(0);
            // int dW = INT_ARG(1);
            // int dH = INT_ARG(2);

            // // padding 
            // int pT = INT_ARG(3);
            // int pW = INT_ARG(4);
            // int pH = INT_ARG(5);

            // // dilation
            // int dilationT = INT_ARG(6);
            // int dilationW = INT_ARG(7);
            // int dilationH = INT_ARG(8);

            // // output padding
            // int aT = INT_ARG(9);
            // int aW = INT_ARG(10);
            // int aH = INT_ARG(11);

            // // bias
            // bool biasUsed = INT_ARG(12) != 0;

            // Nd4jLong *shapeOf;
            // Nd4jLong *newShape;
            // ALLOCATE(shapeOf, block.getWorkspace(), 5, Nd4jLong);
            // ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(5), Nd4jLong);

            // const int nInputPlane  = weights[1];
            // const int nOutputPlane = weights[2];
            // const int kT           = weights[3];
            // const int kH           = weights[4];
            // const int kW           = weights[5];

            // const int batchSize          = input[1];
            // const Nd4jLong inputWidth   = input[5];
            // const Nd4jLong inputHeight  = input[4];
            // const Nd4jLong inputDepth   = input[3];
            // const Nd4jLong outputDepth  = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
            // const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
            // const Nd4jLong outputWidth  = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;

            // nd4j::ArrayUtils::toLongPtr({(Nd4jLong) batchSize, (Nd4jLong)nOutputPlane, (Nd4jLong)outputDepth, (Nd4jLong)outputHeight, (Nd4jLong)outputWidth}, shapeOf);

            // shape::shapeBuffer(5, shapeOf, newShape);

            // RELEASE(shapeOf, block.getWorkspace());

            // return SHAPELIST(newShape);
            return SHAPELIST();
        }

        DECLARE_TYPES(fullconv3d_bp) {
            getOpDescriptor()
                    ->setAllowedInputTypes(nd4j::DataType::ANY)
                    ->setAllowedOutputTypes({ALL_FLOATS});
        }
//////////////////////////////////////////////////////////////////////////
        CUSTOM_OP_IMPL(fullconv3d_bp, 5, 1, false, 0, 13) {
            // auto input = INPUT_VARIABLE(0);
            // auto gradNext = INPUT_VARIABLE(1);
            // auto weights = INPUT_VARIABLE(2);
            // auto finput = INPUT_VARIABLE(3);

            // // not used
            // auto fgradInput = INPUT_VARIABLE(4);


            // REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());
            // REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());

            // auto output = OUTPUT_VARIABLE(0);

            // int dT = INT_ARG(0);
            // int dW = INT_ARG(1);
            // int dH = INT_ARG(2);
            // int pT = INT_ARG(3);
            // int pW = INT_ARG(4);
            // int pH = INT_ARG(5);
            // int dilationT = INT_ARG(6);
            // int dilationW = INT_ARG(7);
            // int dilationH = INT_ARG(8);
            // int aT = INT_ARG(9);
            // int aW = INT_ARG(10);
            // int aH = INT_ARG(11);
            // bool biasUsed = INT_ARG(12) != 0;

            // const int nInputPlane  = (int)weights->shapeOf()[0];
            // const int nOutputPlane = (int)weights->shapeOf()[1];
            // const int kT           = (int)weights->shapeOf()[2];
            // const int kH           = (int)weights->shapeOf()[3];
            // const int kW           = (int)weights->shapeOf()[4];

            // const Nd4jLong inputWidth   = input->shapeOf()[4];
            // const Nd4jLong inputHeight  = input->shapeOf()[3];
            // const Nd4jLong inputDepth   = input->shapeOf()[2];
            // const Nd4jLong outputDepth  = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
            // const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
            // const Nd4jLong outputWidth  = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;

            // const Nd4jLong batchSize = input->shapeOf()[0];


            // REQUIRE_TRUE(output->isSameShape({(int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth}) ,0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));

            // output->assign(0.0);

            // // FIXME: non-inplace reshape!!!!
            // NDArray *gradColumns;
            // //auto gradColumns = finput->reshape('c', {nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth });

            // std::unique_ptr<ResultSet> tadsNext(gradNext->allExamples());
            // std::unique_ptr<ResultSet> tadsOutput(output->allExamples());
            // for (int e = 0; e < tadsNext->size(); e++) {
            //     auto tadNext = tadsNext->at(e);
            //     auto tadOutput = tadsOutput->at(e);

            //     // ConvolutionUtils<T>::_vol2col(
            //     //         tadNext->getBuffer(),
            //     //         nOutputPlane, outputDepth, outputHeight, outputWidth,
            //     //         kT, kH, kW,
            //     //         pT, pH, pW,
            //     //         dT, dH, dW,
            //     //         dilationT,  dilationH,  dilationW,
            //     //         gradColumns->getBuffer());
            //     ConvolutionUtils::vol2col(*tadNext, *gradColumns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);

            //     const auto m = weights->shapeOf()[0];
            //     const auto n = gradColumns->shapeOf()[1];
            //     const auto k = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];

            //     // FIXME: mmul helper should be used here
            //     /*
            //     nd4j::blas::GEMM<T>::op('f', 'n', 'n',
            //                             n, m, k,
            //                             1.0f,
            //                             gradColumns->getBuffer(), n,
            //                             weights->getBuffer(), k,
            //                             0,
            //                             tadOutput->getBuffer(), n

            //     );
            //      */
            // }


            // STORE_RESULT(*output);

            // delete gradColumns;
            return ND4J_STATUS_OK;
        }
        DECLARE_SHAPE_FN(fullconv3d_bp) {
            // output shape equals to input shape, all out of sudden
            // Nd4jLong* newShape;
            // COPY_SHAPE(inputShape->at(0), newShape);

            // return SHAPELIST(newShape);
            return SHAPELIST();
        }

        DECLARE_TYPES(fullconv3d_grad) {
            getOpDescriptor()
                    ->setAllowedInputTypes(nd4j::DataType::ANY)
                    ->setAllowedOutputTypes({ALL_FLOATS});
        }

//////////////////////////////////////////////////////////////////////////
        CUSTOM_OP_IMPL(fullconv3d_grad, 4, 2, false, 1, 13) {
            // auto input = INPUT_VARIABLE(0);
            // auto epsilon = INPUT_VARIABLE(1);
            // auto columns = INPUT_VARIABLE(2);
            // auto ones = INPUT_VARIABLE(3);

            // REQUIRE_TRUE(input->rankOf() == epsilon->rankOf(), 0, "Rank of input (%i) & epsilon (%i) should be equal", input->rankOf(), epsilon->rankOf());
            // REQUIRE_TRUE(input->sizeAt(0) == epsilon->sizeAt(0), 1, "Batch size should be equal for input and epsilon");

            // auto gradWeight = OUTPUT_VARIABLE(0);
            // auto gradBias = OUTPUT_VARIABLE(1);

            // REQUIRE_TRUE(gradBias->sizeAt(0) == gradWeight->sizeAt(1), 0, "Bias shape mismatch");

            // int dT = INT_ARG(0);
            // int dW = INT_ARG(1);
            // int dH = INT_ARG(2);
            // int pT = INT_ARG(3);
            // int pW = INT_ARG(4);
            // int pH = INT_ARG(5);
            // int dilationT = INT_ARG(6);
            // int dilationW = INT_ARG(7);
            // int dilationH = INT_ARG(8);
            // int aT = INT_ARG(9);
            // int aW = INT_ARG(10);
            // int aH = INT_ARG(11);
            // bool biasUsed = INT_ARG(12) != 0;

            // double scale = block.getTArguments()->at(0);

            // int nInputPlane  = (int)gradWeight->shapeOf()[0];
            // int nOutputPlane = (int)gradWeight->shapeOf()[1];
            // int kT           = (int)gradWeight->shapeOf()[2];
            // int kH           = (int)gradWeight->shapeOf()[3];
            // int kW           = (int)gradWeight->shapeOf()[4];


            // const Nd4jLong inputWidth   = input->shapeOf()[4];
            // const Nd4jLong inputHeight  = input->shapeOf()[3];
            // const Nd4jLong inputDepth   = input->shapeOf()[2];
            // const Nd4jLong outputDepth  = (inputDepth - 1) * dT - 2*pT + (dilationT * (kT - 1) + 1) + aT;
            // const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2*pH + (dilationH * (kH - 1) + 1) + aH;
            // const Nd4jLong outputWidth  = (inputWidth - 1) * dW - 2*pW + (dilationW * (kW - 1) + 1) + aW;


            // REQUIRE_TRUE(gradWeight->isContiguous(), 0, "gradWight should be continuous");
            // REQUIRE_TRUE(gradBias->isContiguous(), 0, "gradBias should be continuous");
            // REQUIRE_TRUE(ones->rankOf() == 3, 0, "Ones should have rank 3, got %i instead", ones->rankOf());

            // REQUIRE_TRUE(ones->isSameShape({outputDepth, outputHeight, outputWidth}), 0, "");

            // ones->assign(1.0);

            // std::unique_ptr<ResultSet> tadsInput(input->allExamples());
            // std::unique_ptr<ResultSet> tadsEpsilon(epsilon->allExamples());

            // for (int e = 0; e < tadsInput->size(); e++) {
            //     auto tadInput = tadsInput->at(e);
            //     auto tadEpsilon = tadsEpsilon->at(e);

            //     // ConvolutionUtils<T>::_vol2col(
            //     //         tadEpsilon->getBuffer(), nOutputPlane,
            //     //         outputDepth, outputHeight, outputWidth,
            //     //         kT, kH, kW,
            //     //         pT, pH, pW,
            //     //         dT, dH, dW,
            //     //         dilationT,  dilationH,  dilationW,
            //     //         columns->getBuffer()
            //     // );
            //     ConvolutionUtils::vol2col(*tadEpsilon, *columns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);
            //     const Nd4jLong n = columns->shapeOf()[0];   // nOutputPlane * kt * kh * kw
            //     const Nd4jLong m = tadInput->shapeOf()[0];   // nInputPlane
            //     const Nd4jLong k = columns->shapeOf()[1];

            //     // FIXME: mmul helper should be used here
            //     /**
            //     nd4j::blas::GEMM<T>::op('f', 't', 'n',
            //                             n, m, k,
            //                             scale,
            //                             columns->getBuffer(), k,
            //                             tadInput->getBuffer(), k,
            //                             1,
            //                             gradWeight->getBuffer(), n);
            //                             */

            //     const Nd4jLong m_ = nOutputPlane;
            //     const Nd4jLong k_ = outputDepth * outputHeight * outputWidth;


            //     if (gradBias) {
            //         // FIXME: mmul helper should be used here
            //         /*
            //         nd4j::blas::GEMV<T>::op('t',
            //                                 k_, m_,
            //                                 scale,
            //                                 tadEpsilon->getBuffer(), k_,
            //                                 ones->getBuffer(), 1, (T)1.0f,
            //                                 gradBias->getBuffer(), 1);
            //                                 */
            //     }
            // }


            // STORE_2_RESULTS(*gradWeight, *gradBias);

            return Status::OK();
        }
        DECLARE_SHAPE_FN(fullconv3d_grad) {
            // auto list = SHAPELIST();

            // _grad ops MUST have output arrays provided

            // return list;
            return SHAPELIST();
        }
    }
}

#endif
Eclipse Migration Initial Commit 2019-06-06 15:21:15 +03:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`//`
			`// Created by raver119 on 08.10.2017.`
			`//`

			`#include <op_boilerplate.h>`
			`#if NOT_EXCLUDED(OP_fullconv3d)`

			`#include <ops/declarable/CustomOperations.h>`
			`#include <ops/declarable/helpers/convolutions.h>`

			`namespace nd4j {`
			`namespace ops {`
			`//////////////////////////////////////////////////////////////////////////`
			`CUSTOM_OP_IMPL(fullconv3d, 5, 1, false, 0, 13) {`
			`// auto input = INPUT_VARIABLE(0);`
			`// auto weights = INPUT_VARIABLE(1);`
			`// auto bias = INPUT_VARIABLE(2);`
			`// auto columns = INPUT_VARIABLE(3);`
			`// auto ones = INPUT_VARIABLE(4);`

			`// REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());`
			`// REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());`

			`// // strides`
			`// int dT = INT_ARG(0);`
			`// int dW = INT_ARG(1);`
			`// int dH = INT_ARG(2);`

			`// // padding`
			`// int pT = INT_ARG(3);`
			`// int pW = INT_ARG(4);`
			`// int pH = INT_ARG(5);`

			`// // dilation`
			`// int dilationT = INT_ARG(6);`
			`// int dilationW = INT_ARG(7);`
			`// int dilationH = INT_ARG(8);`

			`// // output padding`
			`// int aT = INT_ARG(9);`
			`// int aW = INT_ARG(10);`
			`// int aH = INT_ARG(11);`

			`// // bias`
			`// bool biasUsed = INT_ARG(12) != 0;`


			`// REQUIRE_TRUE(dT > 0 && dW > 0 && dH > 0, 11,`
			`// "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);`
			`// REQUIRE_TRUE(dilationT > 0 && dilationW > 0 && dilationH > 0, 15,`
			`// "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d",`
			`// dilationT, dilationH, dilationW);`
			`// REQUIRE_TRUE((aT < dT \|\| aT < dilationT)`
			`// && (aW < dW \|\| aW < dilationW)`
			`// && (aH < dH \|\| aH < dilationH), 15,`
			`// "output padding must be smaller than either stride or dilation,"`
			`// " but got aT: %d aH: %d aW: %d dT: %d dH: %d dW: %d "`
			`// "dilationT: %d dilationH: %d dilationW: %d",`
			`// aT, aH, aW, dT, dH, dW, dilationT, dilationH, dilationW);`

			`// auto output = this->getZ(block);`

			`// const int nInputPlane = weights->shapeOf()[0];`
			`// const int nOutputPlane = weights->shapeOf()[1];`
			`// const int kT = weights->shapeOf()[2];`
			`// const int kH = weights->shapeOf()[3];`
			`// const int kW = weights->shapeOf()[4];`

			`// const Nd4jLong inputWidth = input->shapeOf()[4];`
			`// const Nd4jLong inputHeight = input->shapeOf()[3];`
			`// const Nd4jLong inputDepth = input->shapeOf()[2];`
			`// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2pT + (dilationT (kT - 1) + 1) + aT;`
			`// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2pH + (dilationH (kH - 1) + 1) + aH;`
			`// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2pW + (dilationW (kW - 1) + 1) + aW;`

			`// const Nd4jLong batchSize = input->shapeOf()[0];`

			`// REQUIRE_TRUE(output->isSameShape({ (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth}), 0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int)nOutputPlane, (int)outputDepth, (int)outputHeight, (int)outputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));`

			`// std::unique_ptr<ResultSet> inputs(input->allExamples());`
			`// std::unique_ptr<ResultSet> outputs(output->allExamples());`
			`// for (int e = 0; e < batchSize; e++) {`
			`// auto tadIn = inputs->at(e);`
			`// auto tadOut = outputs->at(e);`

			`// const int m = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];`
			`// const int n = columns->shapeOf()[1];`
			`// const int k = weights->shapeOf()[0];`

			`// // FIXME: mmul helper should be used here`
			`// /*`
			`// nd4j::blas::GEMM<T>::op('c', 'n', 't', m, n, k,`
			`// 1.0,`
			`// tadIn->getBuffer(), n,`
			`// weights->getBuffer(), m,`
			`// 0.0,`
			`// columns->getBuffer(), n);`
			`// */`

			`// // ConvolutionUtils<T>::_col2vol(columns->getBuffer(),`
			`// // nOutputPlane, outputDepth, outputHeight, outputWidth,`
			`// // inputDepth, inputHeight, inputWidth,`
			`// // kT, kH, kW,`
			`// // pT, pH, pW,`
			`// // dT, dH, dW,`
			`// // dilationT, dilationH, dilationW,`
			`// // tadOut->getBuffer());`
			`// ConvolutionUtils::col2vol(columns, tadOut, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);`


			`// const int m_ = nOutputPlane;`
			`// const int n_ = outputDepth * outputHeight * outputWidth;`
			`// const int k_ = 1;`

			`// if (biasUsed) {`
			`// // FIXME: mmul helper should be used here`
			`// /*`
			`// nd4j::blas::GEMM<T>::op('c', 't', 'n', n_, m_, k_,`
			`// 1.0,`
			`// ones->getBuffer(), k_,`
			`// bias->getBuffer(), k_,`
			`// 1.0,`
			`// tadOut->getBuffer(), n_);`
			`// */`
			`// }`
			`// }`

			`// STORE_RESULT(*output);`

			`return Status::OK();`
			`}`

			`DECLARE_TYPES(fullconv3d) {`
			`getOpDescriptor()`
			`->setAllowedInputTypes(nd4j::DataType::ANY)`
			`->setAllowedOutputTypes({ALL_FLOATS});`
			`}`

			`DECLARE_SHAPE_FN(fullconv3d) {`
			`// auto input = inputShape->at(0);`
			`// auto weights = inputShape->at(1);`

			`// // strides`
			`// int dT = INT_ARG(0);`
			`// int dW = INT_ARG(1);`
			`// int dH = INT_ARG(2);`

			`// // padding`
			`// int pT = INT_ARG(3);`
			`// int pW = INT_ARG(4);`
			`// int pH = INT_ARG(5);`

			`// // dilation`
			`// int dilationT = INT_ARG(6);`
			`// int dilationW = INT_ARG(7);`
			`// int dilationH = INT_ARG(8);`

			`// // output padding`
			`// int aT = INT_ARG(9);`
			`// int aW = INT_ARG(10);`
			`// int aH = INT_ARG(11);`

			`// // bias`
			`// bool biasUsed = INT_ARG(12) != 0;`

			`// Nd4jLong *shapeOf;`
			`// Nd4jLong *newShape;`
			`// ALLOCATE(shapeOf, block.getWorkspace(), 5, Nd4jLong);`
			`// ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(5), Nd4jLong);`

			`// const int nInputPlane = weights[1];`
			`// const int nOutputPlane = weights[2];`
			`// const int kT = weights[3];`
			`// const int kH = weights[4];`
			`// const int kW = weights[5];`

			`// const int batchSize = input[1];`
			`// const Nd4jLong inputWidth = input[5];`
			`// const Nd4jLong inputHeight = input[4];`
			`// const Nd4jLong inputDepth = input[3];`
			`// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2pT + (dilationT (kT - 1) + 1) + aT;`
			`// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2pH + (dilationH (kH - 1) + 1) + aH;`
			`// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2pW + (dilationW (kW - 1) + 1) + aW;`

			`// nd4j::ArrayUtils::toLongPtr({(Nd4jLong) batchSize, (Nd4jLong)nOutputPlane, (Nd4jLong)outputDepth, (Nd4jLong)outputHeight, (Nd4jLong)outputWidth}, shapeOf);`

			`// shape::shapeBuffer(5, shapeOf, newShape);`

			`// RELEASE(shapeOf, block.getWorkspace());`

			`// return SHAPELIST(newShape);`
			`return SHAPELIST();`
			`}`

			`DECLARE_TYPES(fullconv3d_bp) {`
			`getOpDescriptor()`
			`->setAllowedInputTypes(nd4j::DataType::ANY)`
			`->setAllowedOutputTypes({ALL_FLOATS});`
			`}`
			`//////////////////////////////////////////////////////////////////////////`
			`CUSTOM_OP_IMPL(fullconv3d_bp, 5, 1, false, 0, 13) {`
			`// auto input = INPUT_VARIABLE(0);`
			`// auto gradNext = INPUT_VARIABLE(1);`
			`// auto weights = INPUT_VARIABLE(2);`
			`// auto finput = INPUT_VARIABLE(3);`

			`// // not used`
			`// auto fgradInput = INPUT_VARIABLE(4);`


			`// REQUIRE_TRUE(weights->rankOf() == 5, 0, "Weights should be 5D, got %i instead", weights->rankOf());`
			`// REQUIRE_TRUE(input->rankOf() == 5, 0, "Input should be 5D, got %i instead", input->rankOf());`

			`// auto output = OUTPUT_VARIABLE(0);`

			`// int dT = INT_ARG(0);`
			`// int dW = INT_ARG(1);`
			`// int dH = INT_ARG(2);`
			`// int pT = INT_ARG(3);`
			`// int pW = INT_ARG(4);`
			`// int pH = INT_ARG(5);`
			`// int dilationT = INT_ARG(6);`
			`// int dilationW = INT_ARG(7);`
			`// int dilationH = INT_ARG(8);`
			`// int aT = INT_ARG(9);`
			`// int aW = INT_ARG(10);`
			`// int aH = INT_ARG(11);`
			`// bool biasUsed = INT_ARG(12) != 0;`

			`// const int nInputPlane = (int)weights->shapeOf()[0];`
			`// const int nOutputPlane = (int)weights->shapeOf()[1];`
			`// const int kT = (int)weights->shapeOf()[2];`
			`// const int kH = (int)weights->shapeOf()[3];`
			`// const int kW = (int)weights->shapeOf()[4];`

			`// const Nd4jLong inputWidth = input->shapeOf()[4];`
			`// const Nd4jLong inputHeight = input->shapeOf()[3];`
			`// const Nd4jLong inputDepth = input->shapeOf()[2];`
			`// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2pT + (dilationT (kT - 1) + 1) + aT;`
			`// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2pH + (dilationH (kH - 1) + 1) + aH;`
			`// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2pW + (dilationW (kW - 1) + 1) + aW;`

			`// const Nd4jLong batchSize = input->shapeOf()[0];`


			`// REQUIRE_TRUE(output->isSameShape({(int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth}) ,0, "Output should have shape of [%i, %i, %i, %i, %i], but got [%i, %i, %i, %i, %i] instead", (int) batchSize, (int) nInputPlane, (int) inputDepth, (int) inputHeight, (int) inputWidth, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3), output->sizeAt(4));`

			`// output->assign(0.0);`

			`// // FIXME: non-inplace reshape!!!!`
			`// NDArray *gradColumns;`
			`// //auto gradColumns = finput->reshape('c', {nOutputPlanekWkHkT, inputDepthinputHeight*inputWidth });`

			`// std::unique_ptr<ResultSet> tadsNext(gradNext->allExamples());`
			`// std::unique_ptr<ResultSet> tadsOutput(output->allExamples());`
			`// for (int e = 0; e < tadsNext->size(); e++) {`
			`// auto tadNext = tadsNext->at(e);`
			`// auto tadOutput = tadsOutput->at(e);`

			`// // ConvolutionUtils<T>::_vol2col(`
			`// // tadNext->getBuffer(),`
			`// // nOutputPlane, outputDepth, outputHeight, outputWidth,`
			`// // kT, kH, kW,`
			`// // pT, pH, pW,`
			`// // dT, dH, dW,`
			`// // dilationT, dilationH, dilationW,`
			`// // gradColumns->getBuffer());`
			`// ConvolutionUtils::vol2col(tadNext, gradColumns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);`

			`// const auto m = weights->shapeOf()[0];`
			`// const auto n = gradColumns->shapeOf()[1];`
			`// const auto k = weights->shapeOf()[1] * weights->shapeOf()[2] * weights->shapeOf()[3] * weights->shapeOf()[4];`

			`// // FIXME: mmul helper should be used here`
			`// /*`
			`// nd4j::blas::GEMM<T>::op('f', 'n', 'n',`
			`// n, m, k,`
			`// 1.0f,`
			`// gradColumns->getBuffer(), n,`
			`// weights->getBuffer(), k,`
			`// 0,`
			`// tadOutput->getBuffer(), n`

			`// );`
			`// */`
			`// }`


			`// STORE_RESULT(*output);`

			`// delete gradColumns;`
			`return ND4J_STATUS_OK;`
			`}`
			`DECLARE_SHAPE_FN(fullconv3d_bp) {`
			`// output shape equals to input shape, all out of sudden`
			`// Nd4jLong* newShape;`
			`// COPY_SHAPE(inputShape->at(0), newShape);`

			`// return SHAPELIST(newShape);`
			`return SHAPELIST();`
			`}`

			`DECLARE_TYPES(fullconv3d_grad) {`
			`getOpDescriptor()`
			`->setAllowedInputTypes(nd4j::DataType::ANY)`
			`->setAllowedOutputTypes({ALL_FLOATS});`
			`}`

			`//////////////////////////////////////////////////////////////////////////`
			`CUSTOM_OP_IMPL(fullconv3d_grad, 4, 2, false, 1, 13) {`
			`// auto input = INPUT_VARIABLE(0);`
			`// auto epsilon = INPUT_VARIABLE(1);`
			`// auto columns = INPUT_VARIABLE(2);`
			`// auto ones = INPUT_VARIABLE(3);`

			`// REQUIRE_TRUE(input->rankOf() == epsilon->rankOf(), 0, "Rank of input (%i) & epsilon (%i) should be equal", input->rankOf(), epsilon->rankOf());`
			`// REQUIRE_TRUE(input->sizeAt(0) == epsilon->sizeAt(0), 1, "Batch size should be equal for input and epsilon");`

			`// auto gradWeight = OUTPUT_VARIABLE(0);`
			`// auto gradBias = OUTPUT_VARIABLE(1);`

			`// REQUIRE_TRUE(gradBias->sizeAt(0) == gradWeight->sizeAt(1), 0, "Bias shape mismatch");`

			`// int dT = INT_ARG(0);`
			`// int dW = INT_ARG(1);`
			`// int dH = INT_ARG(2);`
			`// int pT = INT_ARG(3);`
			`// int pW = INT_ARG(4);`
			`// int pH = INT_ARG(5);`
			`// int dilationT = INT_ARG(6);`
			`// int dilationW = INT_ARG(7);`
			`// int dilationH = INT_ARG(8);`
			`// int aT = INT_ARG(9);`
			`// int aW = INT_ARG(10);`
			`// int aH = INT_ARG(11);`
			`// bool biasUsed = INT_ARG(12) != 0;`

			`// double scale = block.getTArguments()->at(0);`

			`// int nInputPlane = (int)gradWeight->shapeOf()[0];`
			`// int nOutputPlane = (int)gradWeight->shapeOf()[1];`
			`// int kT = (int)gradWeight->shapeOf()[2];`
			`// int kH = (int)gradWeight->shapeOf()[3];`
			`// int kW = (int)gradWeight->shapeOf()[4];`


			`// const Nd4jLong inputWidth = input->shapeOf()[4];`
			`// const Nd4jLong inputHeight = input->shapeOf()[3];`
			`// const Nd4jLong inputDepth = input->shapeOf()[2];`
			`// const Nd4jLong outputDepth = (inputDepth - 1) * dT - 2pT + (dilationT (kT - 1) + 1) + aT;`
			`// const Nd4jLong outputHeight = (inputHeight - 1) * dH - 2pH + (dilationH (kH - 1) + 1) + aH;`
			`// const Nd4jLong outputWidth = (inputWidth - 1) * dW - 2pW + (dilationW (kW - 1) + 1) + aW;`


			`// REQUIRE_TRUE(gradWeight->isContiguous(), 0, "gradWight should be continuous");`
			`// REQUIRE_TRUE(gradBias->isContiguous(), 0, "gradBias should be continuous");`
			`// REQUIRE_TRUE(ones->rankOf() == 3, 0, "Ones should have rank 3, got %i instead", ones->rankOf());`

			`// REQUIRE_TRUE(ones->isSameShape({outputDepth, outputHeight, outputWidth}), 0, "");`

			`// ones->assign(1.0);`

			`// std::unique_ptr<ResultSet> tadsInput(input->allExamples());`
			`// std::unique_ptr<ResultSet> tadsEpsilon(epsilon->allExamples());`

			`// for (int e = 0; e < tadsInput->size(); e++) {`
			`// auto tadInput = tadsInput->at(e);`
			`// auto tadEpsilon = tadsEpsilon->at(e);`

			`// // ConvolutionUtils<T>::_vol2col(`
			`// // tadEpsilon->getBuffer(), nOutputPlane,`
			`// // outputDepth, outputHeight, outputWidth,`
			`// // kT, kH, kW,`
			`// // pT, pH, pW,`
			`// // dT, dH, dW,`
			`// // dilationT, dilationH, dilationW,`
			`// // columns->getBuffer()`
			`// // );`
			`// ConvolutionUtils::vol2col(tadEpsilon, columns, dT, dH, dW, pT, pH, pW, dilationT, dilationH, dilationW);`
			`// const Nd4jLong n = columns->shapeOf()[0]; // nOutputPlane * kt * kh * kw`
			`// const Nd4jLong m = tadInput->shapeOf()[0]; // nInputPlane`
			`// const Nd4jLong k = columns->shapeOf()[1];`

			`// // FIXME: mmul helper should be used here`
			`// /**`
			`// nd4j::blas::GEMM<T>::op('f', 't', 'n',`
			`// n, m, k,`
			`// scale,`
			`// columns->getBuffer(), k,`
			`// tadInput->getBuffer(), k,`
			`// 1,`
			`// gradWeight->getBuffer(), n);`
			`// */`

			`// const Nd4jLong m_ = nOutputPlane;`
			`// const Nd4jLong k_ = outputDepth * outputHeight * outputWidth;`


			`// if (gradBias) {`
			`// // FIXME: mmul helper should be used here`
			`// /*`
			`// nd4j::blas::GEMV<T>::op('t',`
			`// k_, m_,`
			`// scale,`
			`// tadEpsilon->getBuffer(), k_,`
			`// ones->getBuffer(), 1, (T)1.0f,`
			`// gradBias->getBuffer(), 1);`
			`// */`
			`// }`
			`// }`


			`// STORE_2_RESULTS(gradWeight, gradBias);`

			`return Status::OK();`
			`}`
			`DECLARE_SHAPE_FN(fullconv3d_grad) {`
			`// auto list = SHAPELIST();`

			`// _grad ops MUST have output arrays provided`

			`// return list;`
			`return SHAPELIST();`
			`}`
			`}`
			`}`

			`#endif`