raver119 7783012f39
cuDNN integration (#150)
* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* one file

Signed-off-by: raver119 <raver119@gmail.com>

* few more includes

Signed-off-by: raver119 <raver119@gmail.com>

* m?

Signed-off-by: raver119 <raver119@gmail.com>

* const

Signed-off-by: raver119 <raver119@gmail.com>

* cudnn linkage in tests

Signed-off-by: raver119 <raver119@gmail.com>

* culibos

Signed-off-by: raver119 <raver119@gmail.com>

* static reminder

Signed-off-by: raver119 <raver119@gmail.com>

* platform engine tag

Signed-off-by: raver119 <raver119@gmail.com>

* HAVE_CUDNN moved to config.h.in

Signed-off-by: raver119 <raver119@gmail.com>

* include

Signed-off-by: raver119 <raver119@gmail.com>

* include

Signed-off-by: raver119 <raver119@gmail.com>

* skip cudnn handle creation if there's not cudnn

Signed-off-by: raver119 <raver119@gmail.com>

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* target device in context

Signed-off-by: raver119 <raver119@gmail.com>

* platform engines

Signed-off-by: raver119 <raver119@gmail.com>

* platform engines

Signed-off-by: raver119 <raver119@gmail.com>

* allow multiple -h args

Signed-off-by: raver119 <raver119@gmail.com>

* allow multiple -h args

Signed-off-by: raver119 <raver119@gmail.com>

* move mkldnn out of CPU block

Signed-off-by: raver119 <raver119@gmail.com>

* link to mkldnn on cuda

Signed-off-by: raver119 <raver119@gmail.com>

* less prints

Signed-off-by: raver119 <raver119@gmail.com>

* minor tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* next step

Signed-off-by: raver119 <raver119@gmail.com>

* conv2d NCHW draft

Signed-off-by: raver119 <raver119@gmail.com>

* conv2d biasAdd

Signed-off-by: raver119 <raver119@gmail.com>

* test for MKL/CUDNN combined use

Signed-off-by: raver119 <raver119@gmail.com>

* - provide additional code for conv2d ff based on cudnn api, not tested yet

Signed-off-by: Yurii <iuriish@yahoo.com>

* - further work on conv2d helper based on using cudnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - fixing several cuda bugs which appeared after cudnn lib had been started to use

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation of conv2d backprop op based on cudnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementaion of conv3d and conv3d_bp ops based on cudnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - bugs fixing in conv3d/conv3d_bp ops (cudnn in use)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation of depthwiseConv2d (ff/bp) op based on cudnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - implementation of batchnorm ff op based on cudnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - disable cudnn batchnorm temporary

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add minor change in cmake

Signed-off-by: Yurii <iuriish@yahoo.com>

* engine for depthwise mkldnn

Signed-off-by: raver119 <raver119@gmail.com>

* couple of includes

Signed-off-by: raver119 <raver119@gmail.com>

* - provide permutation to cudnn batchnorm ff when format is NHWC

Signed-off-by: Yurii <iuriish@yahoo.com>

* lgamma fix

Signed-off-by: raver119 <raver119@gmail.com>

* - eliminate memory leak in two tests

Signed-off-by: Yurii <iuriish@yahoo.com>

Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
2020-01-20 21:32:46 +03:00

159 lines
5.6 KiB
C++

/*******************************************************************************
* Copyright (c) 2019 Konduit K.K.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#ifndef SD_CUDNNUTILS_H
#define SD_CUDNNUTILS_H
#include <ops/declarable/PlatformHelper.h>
#include <ops/declarable/OpRegistrator.h>
#include <platform_boilerplate.h>
#include <exceptions/cuda_exception.h>
#include <exceptions/datatype_exception.h>
#include <dll.h>
#include <cudnn.h>
namespace nd4j {
namespace ops {
namespace platforms {
DECLARE_PLATFORM(conv2d, ENGINE_CUDA);
DECLARE_PLATFORM(conv2d_bp, ENGINE_CUDA);
DECLARE_PLATFORM(conv3dnew, ENGINE_CUDA);
DECLARE_PLATFORM(conv3dnew_bp, ENGINE_CUDA);
DECLARE_PLATFORM(depthwise_conv2d, ENGINE_CUDA);
DECLARE_PLATFORM(depthwise_conv2d_bp, ENGINE_CUDA);
DECLARE_PLATFORM(batchnorm, ENGINE_CUDA);
DECLARE_PLATFORM(batchnorm_bp, ENGINE_CUDA);
//////////////////////////////////////////////////////////////////////////
FORCEINLINE cudnnDataType_t cudnnDataType(nd4j::DataType dataType) {
switch (dataType) {
case nd4j::DataType::FLOAT32:
return CUDNN_DATA_FLOAT;
case nd4j::DataType::DOUBLE:
return CUDNN_DATA_DOUBLE;
case nd4j::DataType::HALF:
return CUDNN_DATA_HALF;
case nd4j::DataType::INT32:
return CUDNN_DATA_INT32;
case nd4j::DataType::INT8:
return CUDNN_DATA_INT8;
default:
throw datatype_exception::build("Unsupported data type", dataType);
}
}
//////////////////////////////////////////////////////////////////////////
FORCEINLINE void checkConv2dCUDNNPadAsymmetric(NDArray* &input, NDArray* &gradI,
const int iH, const int iW,
const int oH, const int oW,
const int kH, const int kW,
const int sH, const int sW,
const int pH, const int pW,
const int dH, const int dW,
const bool isNCHW) {
const auto pHsum = ((oH - 1) * sH + ((kH - 1) * dH + 1) - iH);
const auto pWsum = ((oW - 1) * sW + ((kW - 1) * dW + 1) - iW);
const bool isPHasymm = pH != (pHsum - pH);
const bool isPWasymm = pW != (pWsum - pW);
if(!isPHasymm && !isPWasymm)
return;
std::vector<Nd4jLong> newShape = input->getShapeAsVector();
const int iHposition = isNCHW ? 2 : 1;
if(isPHasymm)
newShape[iHposition] += 1;
if(isPWasymm)
newShape[iHposition + 1] += 1;
NDArray* newInput = new NDArray(input->ordering(), newShape, input->dataType(), input->getContext());
if(isNCHW)
(*newInput)({0,0, 0,0, 0,input->sizeAt(2), 0,input->sizeAt(3)}).assign(input);
else
(*newInput)({0,0, 0,input->sizeAt(1), 0,input->sizeAt(2), 0,0}).assign(input);
input = newInput;
if(gradI != nullptr)
gradI = new NDArray(gradI->ordering(), newShape, gradI->dataType(), gradI->getContext());
}
//////////////////////////////////////////////////////////////////////////
FORCEINLINE void checkConv3dCUDNNPadAsymmetric(NDArray* &input, NDArray* &gradI,
const int iD, const int iH, const int iW,
const int oD, const int oH, const int oW,
const int kD, const int kH, const int kW,
const int sD, const int sH, const int sW,
const int pD, const int pH, const int pW,
const int dD, const int dH, const int dW,
const bool isNCDHW) {
const auto pDsum = ((oD - 1) * sD + ((kD - 1) * dD + 1) - iD);
const auto pHsum = ((oH - 1) * sH + ((kH - 1) * dH + 1) - iH);
const auto pWsum = ((oW - 1) * sW + ((kW - 1) * dW + 1) - iW);
const bool isPDasymm = pD != (pDsum - pD);
const bool isPHasymm = pH != (pHsum - pH);
const bool isPWasymm = pW != (pWsum - pW);
if(!isPDasymm && !isPHasymm && !isPWasymm)
return;
std::vector<Nd4jLong> newShape = input->getShapeAsVector();
const int iDposition = isNCDHW ? 2 : 1;
if(isPDasymm)
newShape[iDposition] += 1;
if(isPHasymm)
newShape[iDposition + 1] += 1;
if(isPWasymm)
newShape[iDposition + 2] += 1;
NDArray* newInput = new NDArray(input->ordering(), newShape, input->dataType(), input->getContext());
if(isNCDHW)
(*newInput)({0,0, 0,0, 0,input->sizeAt(2), 0,input->sizeAt(3), 0,input->sizeAt(4)}).assign(input);
else
(*newInput)({0,0, 0,input->sizeAt(1), 0,input->sizeAt(2), 0,input->sizeAt(3), 0,0}).assign(input);
input = newInput;
if(gradI != nullptr)
gradI = new NDArray(gradI->ordering(), newShape, gradI->dataType(), gradI->getContext());
}
}
}
}
#endif //SD_CUDNNUTILS_H