[WIP] stb/bts nd (#144)
* - start working on space_to_batch_nd Signed-off-by: Yurii <yurii@skymind.io> * - provide cpu helper for space_to_batch_nd op Signed-off-by: Yurii <yurii@skymind.io> * few typos fixed Signed-off-by: raver119 <raver119@gmail.com> * - add tests for space_to_batch and correct bugs Signed-off-by: Yurii <yurii@skymind.io> * - write cuda kernel for space_to_batch op Signed-off-by: Yurii <yurii@skymind.io> * - add order argument to shape::index2coords method in convolution cuda ops Signed-off-by: Yurii <yurii@skymind.io> * - restore some previous code Signed-off-by: Yurii <yurii@skymind.io> * old col2im kernel activated Signed-off-by: raver119 <raver119@gmail.com> * - change coords calculation in col2im kernel Signed-off-by: Yurii <yurii@skymind.io> * - restore old col2im kernel Signed-off-by: Yurii <yurii@skymind.io> * - add custom op for batch_to_space Signed-off-by: Yurii <yurii@skymind.io> * - provide cpu version for batch_to_space_nd op Signed-off-by: Yurii <yurii@skymind.io> * - provide cuda kernel for batch_to_space_nd op Signed-off-by: Yurii <yurii@skymind.io>master
parent
e604ffe0d2
commit
eea3062ccf
|
@ -599,6 +599,7 @@ namespace nd4j {
|
|||
/**
|
||||
* apply scalar operation to array
|
||||
* extraParams - extra parameters for operation
|
||||
* returns scalar array
|
||||
*/
|
||||
NDArray reduceNumber(nd4j::reduce::FloatOps ops, void *extraParams = nullptr) const;
|
||||
NDArray reduceNumber(nd4j::reduce::SameOps ops, void *extraParams = nullptr) const;
|
||||
|
|
|
@ -30,7 +30,6 @@ limitations under the License.
|
|||
==============================================================================*/
|
||||
|
||||
//
|
||||
// @author raver119@gmail.com, created on 19.01.18.
|
||||
// @author Yurii Shyrma (iuriish@yahoo.com)
|
||||
//
|
||||
|
||||
|
@ -63,9 +62,8 @@ CUSTOM_OP_IMPL(batch_to_space, 2, 1, false, 0, 1) {
|
|||
REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank);
|
||||
REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0);
|
||||
|
||||
const std::string expectedCropShape = "[2, 2]";
|
||||
const std::string actualCropShape = ShapeUtils::shapeAsString(crop);
|
||||
REQUIRE_TRUE(actualCropShape == expectedCropShape, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", actualCropShape.c_str());
|
||||
if(crop->sizeAt(0) != 2 || crop->sizeAt(1) != 2)
|
||||
REQUIRE_TRUE(false, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(crop).c_str());
|
||||
|
||||
const uint cropBottom = crop->e<uint>(0,0);
|
||||
const uint cropTop = crop->e<uint>(0,1);
|
||||
|
@ -104,9 +102,8 @@ DECLARE_SHAPE_FN(batch_to_space) {
|
|||
REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank);
|
||||
REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0);
|
||||
|
||||
const std::string expectedCropShape = "[2, 2]";
|
||||
const std::string actualCropShape = ShapeUtils::shapeAsString(cropShapeInfo);
|
||||
REQUIRE_TRUE(actualCropShape == expectedCropShape, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", actualCropShape.c_str());
|
||||
if(cropShapeInfo[1] != 2 || cropShapeInfo[2] != 2)
|
||||
REQUIRE_TRUE(false, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(cropShapeInfo).c_str());
|
||||
|
||||
const uint cropBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0);
|
||||
const uint cropTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1);
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Apache License, Version 2.0 which is available at
|
||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
******************************************************************************/
|
||||
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
//
|
||||
// @author Yurii Shyrma (iuriish@yahoo.com)
|
||||
//
|
||||
|
||||
#include <op_boilerplate.h>
|
||||
#if NOT_EXCLUDED(OP_batch_to_space_nd)
|
||||
|
||||
#include <ops/declarable/headers/parity_ops.h>
|
||||
#include <ops/declarable/helpers/s_t_b.h>
|
||||
|
||||
namespace nd4j {
|
||||
namespace ops {
|
||||
|
||||
|
||||
CUSTOM_OP_IMPL(batch_to_space_nd, 3, 1, false, 0, 0) {
|
||||
|
||||
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
|
||||
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
|
||||
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto blockShape = INPUT_VARIABLE(1);
|
||||
auto crop = INPUT_VARIABLE(2);
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
REQUIRE_TRUE(blockShape->rankOf() == 1, 0, "BatchToSpaceND: rank of blockShape array must be equal to one, but got %i instead !", blockShape->rankOf());
|
||||
|
||||
const uint numOfSpatialDims = blockShape->sizeAt(0);
|
||||
|
||||
const auto product = blockShape->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
|
||||
REQUIRE_TRUE(input->sizeAt(0) % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, input->sizeAt(0));
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
|
||||
REQUIRE_TRUE(blockSize >= 2, 0, "BatchToSpaceND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
|
||||
}
|
||||
|
||||
if(crop->sizeAt(0) != numOfSpatialDims || crop->sizeAt(1) != 2) {
|
||||
const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(crop).c_str());
|
||||
}
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const auto cropLeft = crop->e<uint>(i,0);
|
||||
const auto cropRight = crop->e<uint>(i,1);
|
||||
const auto outSpatialDim = input->sizeAt(i + 1) * blockShape->e<Nd4jLong>(i) - cropLeft - cropRight;
|
||||
REQUIRE_TRUE(outSpatialDim >= 0, 0, "BatchToSpaceND: crop left/right values are too big and cause negative output spatial dimension/dimensions !");
|
||||
}
|
||||
|
||||
helpers::batchToSpaceND(block.launchContext(), *input, *blockShape, *crop, *output);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
DECLARE_TYPES(batch_to_space_nd) {
|
||||
|
||||
getOpDescriptor()->setAllowedInputTypes(0, nd4j::DataType::ANY)
|
||||
->setAllowedInputTypes(1, {ALL_INTS})
|
||||
->setAllowedInputTypes(2, {ALL_INTS})
|
||||
->setSameMode(true);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
DECLARE_SHAPE_FN(batch_to_space_nd) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0);
|
||||
auto blockShapeInfo = inputShape->at(1);
|
||||
auto cropShapeInfo = inputShape->at(2);
|
||||
|
||||
REQUIRE_TRUE(blockShapeInfo[0] == 1, 0, "BatchToSpaceND: rank of blockShape array must be equal to one, but got %i instead !", blockShapeInfo[0]);
|
||||
|
||||
const auto product = INPUT_VARIABLE(1)->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
|
||||
REQUIRE_TRUE(inputShapeInfo[1] % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, inputShapeInfo[1]);
|
||||
|
||||
const auto numOfSpatialDims = blockShapeInfo[1];
|
||||
|
||||
if(cropShapeInfo[1] != numOfSpatialDims || cropShapeInfo[2] != 2) {
|
||||
const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(cropShapeInfo).c_str());
|
||||
}
|
||||
|
||||
|
||||
std::vector<Nd4jLong> outShape(inputShapeInfo + 1, inputShapeInfo + 1 + inputShapeInfo[0]);
|
||||
|
||||
outShape[0] /= product;
|
||||
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i)
|
||||
outShape[i + 1] = outShape[i + 1] * INPUT_VARIABLE(1)->e<Nd4jLong>(i) - INPUT_VARIABLE(2)->e<uint>(i,0) - INPUT_VARIABLE(2)->e<uint>(i,1);
|
||||
|
||||
return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
//
|
||||
// @author raver119@gmail.com, created on 19.01.18.
|
||||
// @author Yurii Shyrma (iuriish@yahoo.com)
|
||||
// @author raver119@gmail.com
|
||||
//
|
||||
|
||||
#include <op_boilerplate.h>
|
||||
|
@ -39,12 +39,11 @@ CUSTOM_OP_IMPL(space_to_batch, 2, 1, false, 0, 1) {
|
|||
const uint blockSize = INT_ARG(0);
|
||||
REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatch: integer parameter block_size must be >= 2, but got %i instead", blockSize);
|
||||
|
||||
const int rank = input->rankOf();
|
||||
REQUIRE_TRUE(rank == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", rank);
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", input->rankOf());
|
||||
REQUIRE_TRUE(output->rankOf() == 4, 0, "SpaceToBatch: rank of output array must be equal 4, but got %i instead", output->rankOf());
|
||||
|
||||
const std::string expectedpaddingShape = "[2, 2]";
|
||||
const std::string actualpaddingShape = ShapeUtils::shapeAsString(padding);
|
||||
REQUIRE_TRUE(actualpaddingShape == expectedpaddingShape, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", actualpaddingShape.c_str());
|
||||
if(padding->sizeAt(0) != 2 || padding->sizeAt(1) != 2)
|
||||
REQUIRE_TRUE(false, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(padding).c_str());
|
||||
|
||||
const uint padBottom = padding->e<uint>(0,0);
|
||||
const uint padTop = padding->e<uint>(0,1);
|
||||
|
@ -78,9 +77,8 @@ DECLARE_SHAPE_FN(space_to_batch) {
|
|||
const int rank = inputShapeInfo[0];
|
||||
REQUIRE_TRUE(rank == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", rank);
|
||||
|
||||
const std::string expectedpaddingShape = "[2, 2]";
|
||||
const std::string actualpaddingShape = ShapeUtils::shapeAsString(paddingShapeInfo);
|
||||
REQUIRE_TRUE(actualpaddingShape == expectedpaddingShape, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", actualpaddingShape.c_str());
|
||||
if(paddingShapeInfo[1] != 2 || paddingShapeInfo[1] != 2)
|
||||
REQUIRE_TRUE(false, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(paddingShapeInfo).c_str());
|
||||
|
||||
const uint padBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0);
|
||||
const uint padTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1);
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
//
|
||||
// @author Yurii Shyrma (iuriish@yahoo.com)
|
||||
//
|
||||
|
||||
#include <op_boilerplate.h>
|
||||
#if NOT_EXCLUDED(OP_space_to_batch_nd)
|
||||
|
||||
#include <ops/declarable/headers/parity_ops.h>
|
||||
#include <ops/declarable/helpers/s_t_b.h>
|
||||
|
||||
namespace nd4j {
|
||||
namespace ops {
|
||||
|
||||
|
||||
CUSTOM_OP_IMPL(space_to_batch_nd, 3, 1, false, 0, 0) {
|
||||
|
||||
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
|
||||
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockSize[0], (iW + padLeft + padRight)/blockSize[1], iC]
|
||||
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto blockShape = INPUT_VARIABLE(1);
|
||||
auto padding = INPUT_VARIABLE(2);
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
REQUIRE_TRUE(blockShape->rankOf() == 1, 0, "SpaceToBatchND: rank of blockShape array must be equal to one, but got %i instead !", blockShape->rankOf());
|
||||
|
||||
const uint numOfSpatialDims = blockShape->sizeAt(0);
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == output->rankOf(), 0, "SpaceToBatchND: rank of input and output array must be the same, but got %i and %i correspondingly !", input->rankOf(), output->rankOf());
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
|
||||
REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatchND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
|
||||
}
|
||||
|
||||
if(padding->sizeAt(0) != numOfSpatialDims || padding->sizeAt(1) != 2) {
|
||||
const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(padding).c_str());
|
||||
}
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const uint padLeft = padding->e<uint>(i,0);
|
||||
const uint padRight = padding->e<uint>(i,1);
|
||||
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
|
||||
REQUIRE_TRUE((input->sizeAt(i + 1) + padLeft + padRight) % blockSize == 0, 0, "SpaceToBatchND: after padding, spatial dimensions of input array must be divisible by blockSize !");
|
||||
}
|
||||
|
||||
helpers::spaceToBatchND(block.launchContext(), *input, *blockShape, *padding, *output);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
DECLARE_TYPES(space_to_batch_nd) {
|
||||
|
||||
getOpDescriptor()->setAllowedInputTypes(0, nd4j::DataType::ANY)
|
||||
->setAllowedInputTypes(1, {ALL_INTS})
|
||||
->setAllowedInputTypes(2, {ALL_INTS})
|
||||
->setSameMode(true);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
DECLARE_SHAPE_FN(space_to_batch_nd) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0);
|
||||
auto blockShapeInfo = inputShape->at(1);
|
||||
auto paddingShapeInfo = inputShape->at(2);
|
||||
|
||||
REQUIRE_TRUE(blockShapeInfo[0] == 1, 0, "SpaceToBatchND: rank of blockShape array must be equal to one, but got %i instead !", blockShapeInfo[0]);
|
||||
|
||||
const uint numOfSpatialDims = blockShapeInfo[1];
|
||||
|
||||
if(paddingShapeInfo[1] != numOfSpatialDims || paddingShapeInfo[2] != 2) {
|
||||
const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(paddingShapeInfo).c_str());
|
||||
}
|
||||
|
||||
std::vector<Nd4jLong> outShape(inputShapeInfo + 1, inputShapeInfo + 1 + inputShapeInfo[0]);
|
||||
|
||||
outShape[0] *= INPUT_VARIABLE(1)->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
|
||||
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i)
|
||||
outShape[i + 1] = (outShape[i + 1] + INPUT_VARIABLE(2)->e<uint>(i,0) + INPUT_VARIABLE(2)->e<uint>(i,1)) / INPUT_VARIABLE(1)->e<Nd4jLong>(i);
|
||||
|
||||
return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -617,6 +617,10 @@ namespace nd4j {
|
|||
DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1);
|
||||
#endif
|
||||
|
||||
#if NOT_EXCLUDED(OP_space_to_batch_nd)
|
||||
DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0);
|
||||
#endif
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
@ -624,6 +628,9 @@ namespace nd4j {
|
|||
#if NOT_EXCLUDED(OP_batch_to_space)
|
||||
DECLARE_CUSTOM_OP(batch_to_space, 2, 1, false, 0, 1);
|
||||
#endif
|
||||
#if NOT_EXCLUDED(OP_batch_to_space_nd)
|
||||
DECLARE_CUSTOM_OP(batch_to_space_nd, 3, 1, false, 0, 0);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* top_k operation returns a vector of k top values for
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
******************************************************************************/
|
||||
|
||||
//
|
||||
// @author raver119@gmail.com, created on 19.01.18.
|
||||
// @author Yurii Shyrma (iuriish@yahoo.com)
|
||||
// @author raver119@gmail.com
|
||||
//
|
||||
|
||||
#include <ops/declarable/helpers/s_t_b.h>
|
||||
|
@ -90,13 +90,107 @@ void batchToSpace(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
|
|||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& output, const uint numOfSpatialDims) {
|
||||
|
||||
// input [bS, H * blockShape[0], W * blockShape[1], iC]
|
||||
// output [bS, H * blockShape[0] - cropBottom - cropTop, W * blockShape[1] - cropLeft - cropRight, iC]
|
||||
|
||||
// if (cropTop = cropBottom = cropRight = cropLeft = 0) shapes are the same
|
||||
// else:
|
||||
// oH -> [cropBottom, iH - cropTop]
|
||||
// oW -> [cropLeft, iH - cropRight]
|
||||
// xLen >= zLen
|
||||
|
||||
const T* x = input.bufferAsT<T>();
|
||||
T* z = output.bufferAsT<T>();
|
||||
|
||||
const int rank = input.rankOf();
|
||||
const Nd4jLong zLen = output.lengthOf();
|
||||
|
||||
std::vector<Nd4jLong> coords(rank);
|
||||
|
||||
// loop through input array
|
||||
PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) firstprivate(coords))
|
||||
|
||||
for (Nd4jLong i = 0; i < zLen; ++i) {
|
||||
|
||||
shape::index2coords(rank, output.shapeOf(), i, zLen, coords.data());
|
||||
|
||||
const auto zOffset = shape::getOffset(0, output.shapeOf(), output.stridesOf(), coords.data(), rank);
|
||||
|
||||
// evaluate spatial coordinates for x
|
||||
for(uint j = 1; j <= numOfSpatialDims; ++j)
|
||||
coords[j] += crop.e<uint>(j - 1, 0); // add crop left
|
||||
|
||||
z[zOffset] = x[shape::getOffset(0, input.shapeOf(), input.stridesOf(), coords.data(), rank)];
|
||||
}
|
||||
}
|
||||
|
||||
BUILD_SINGLE_TEMPLATE(template void batchToSpaceND_, (const NDArray& input, const NDArray& crop, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output) {
|
||||
|
||||
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
|
||||
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
|
||||
|
||||
const uint rank = input.rankOf();
|
||||
const uint numOfSpatialDims = blockShape.sizeAt(0);
|
||||
|
||||
//*** construct reshaping std::vector for first reshape of input array ***//
|
||||
|
||||
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < numOfSpatialDims; ++i)
|
||||
temp[i] = blockShape.e<Nd4jLong>(i);
|
||||
temp[i++] = output.sizeAt(0);
|
||||
for(int j = 1; j < rank; ++i, ++j)
|
||||
temp[i] = input.sizeAt(j);
|
||||
|
||||
NDArray inputRearranged0 = input.reshape(input.ordering(), temp);
|
||||
|
||||
//*** construct permuting std::vector for permutation of input array ***//
|
||||
|
||||
temp[0] = numOfSpatialDims;
|
||||
|
||||
for(i = 1; i <= numOfSpatialDims; ++i) {
|
||||
temp[2*i - 1] = numOfSpatialDims + i;
|
||||
temp[2*i] = i - 1;
|
||||
}
|
||||
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
|
||||
temp[i] = i;
|
||||
|
||||
inputRearranged0.permutei(temp);
|
||||
|
||||
|
||||
if(input.lengthOf() == output.lengthOf()) {
|
||||
output.assign(inputRearranged0);
|
||||
}
|
||||
else {
|
||||
//*** construct reshaping std::vector for second reshape of input array ***//
|
||||
|
||||
temp.resize(rank);
|
||||
|
||||
temp[0] = output.sizeAt(0);
|
||||
|
||||
for(i = 1; i < rank; ++i)
|
||||
temp[i] = (i <= numOfSpatialDims) ? input.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : input.sizeAt(i);
|
||||
|
||||
NDArray inputRearranged1 = inputRearranged0.reshape(input.ordering(), temp);
|
||||
|
||||
BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceND_, (inputRearranged1, crop, output, numOfSpatialDims), LIBND4J_TYPES);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) {
|
||||
|
||||
// input [bS, H * blockSize - padBottom - padTop, W * blockSize - padLeft - padRight, iC]
|
||||
// output [bs, H * blockSize, W * blockSize, iC]
|
||||
// output [bS, H * blockSize, W * blockSize, iC]
|
||||
|
||||
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
|
||||
// else:
|
||||
|
@ -145,26 +239,153 @@ void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
|
|||
|
||||
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockSize*blockSize, (iH + padBottom + padTop)/blockSize, (iW + padLeft + padRight)/blockSize, iC]
|
||||
|
||||
NDArray outputRearranged0 = output.reshape(output.ordering(), {blockSize, blockSize, input.sizeAt(0), output.sizeAt(1), output.sizeAt(2), input.sizeAt(3)});
|
||||
NDArray outputRearranged0 = output.reshape(output.ordering(), {blockSize, blockSize, input.sizeAt(0), output.sizeAt(1), output.sizeAt(2), output.sizeAt(3)});
|
||||
outputRearranged0.permutei({2, 3,0, 4,1, 5});
|
||||
|
||||
if(input.lengthOf() == output.lengthOf()) {
|
||||
outputRearranged0.assign(input);
|
||||
}
|
||||
else {
|
||||
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, input.sizeAt(3)});
|
||||
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, output.sizeAt(3)});
|
||||
BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatch_, (input, outputRearranged1, padBottom, padTop, padLeft, padRight), LIBND4J_TYPES);
|
||||
|
||||
if(output.getBuffer() != outputRearranged1.getBuffer())
|
||||
outputRearranged0.assign(outputRearranged1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArray& output, const uint numOfSpatialDims) {
|
||||
|
||||
// 4D example
|
||||
// input [bS, H * blockShape[0] - padBottom - padTop, W * blockShape[1] - padLeft - padRight, iC]
|
||||
// output [bS, H * blockShape[0], W * blockShape[1], iC]
|
||||
|
||||
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
|
||||
// else:
|
||||
// iH -> [padBottom, oH - padTop]
|
||||
// iW -> [padLeft, oW - padRight]
|
||||
// zLen > xLen
|
||||
|
||||
const T* x = input.bufferAsT<T>();
|
||||
T* z = output.bufferAsT<T>();
|
||||
|
||||
const int rank = input.rankOf();
|
||||
const Nd4jLong zLen = output.lengthOf();
|
||||
|
||||
std::vector<Nd4jLong> coords(rank);
|
||||
|
||||
// loop through output array
|
||||
PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) firstprivate(coords))
|
||||
for (Nd4jLong i = 0; i < zLen; ++i) {
|
||||
|
||||
shape::index2coords(rank, output.shapeOf(), i, zLen, coords.data());
|
||||
|
||||
const auto zOffset = shape::getOffset(0, output.shapeOf(), output.stridesOf(), coords.data(), rank);
|
||||
|
||||
bool within = true;
|
||||
|
||||
for(uint j = 1; j <= numOfSpatialDims; ++j) {
|
||||
|
||||
const auto padLeft = padding.e<uint>(j - 1, 0);
|
||||
const auto padRight = padding.e<uint>(j - 1, 1);
|
||||
|
||||
within &= (coords[j] >= padLeft && coords[j] < output.sizeAt(j) - padRight);
|
||||
|
||||
if(!within)
|
||||
break;
|
||||
|
||||
coords[j] -= padLeft; // get coordinates for x
|
||||
}
|
||||
|
||||
if(within)
|
||||
z[zOffset] = x[shape::getOffset(0, input.shapeOf(), input.stridesOf(), coords.data(), rank)];
|
||||
else
|
||||
z[zOffset] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
BUILD_SINGLE_TEMPLATE(template void spaceToBatchND_, (const NDArray& input, const NDArray& padding, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output ) {
|
||||
|
||||
// 4D example with two spatial dimensions
|
||||
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockShape[0], (iW + padLeft + padRight)/blockShape[1], iC]
|
||||
|
||||
const uint rank = input.rankOf();
|
||||
|
||||
const uint numOfSpatialDims = blockShape.sizeAt(0);
|
||||
|
||||
//*** construct reshaping std::vector for first reshape of output array ***//
|
||||
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < numOfSpatialDims; ++i)
|
||||
temp[i] = blockShape.e<Nd4jLong>(i);
|
||||
temp[i++] = input.sizeAt(0);
|
||||
for(int j = 1; j < rank; ++i, ++j)
|
||||
temp[i] = output.sizeAt(j);
|
||||
|
||||
NDArray outputRearranged0 = output.reshape(output.ordering(), temp);
|
||||
|
||||
//*** construct permuting std::vector for permutation of output array ***//
|
||||
|
||||
temp[0] = numOfSpatialDims;
|
||||
|
||||
for(i = 1; i <= numOfSpatialDims; ++i) {
|
||||
temp[2*i - 1] = numOfSpatialDims + i;
|
||||
temp[2*i] = i - 1;
|
||||
}
|
||||
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
|
||||
temp[i] = i;
|
||||
|
||||
outputRearranged0.permutei(temp);
|
||||
|
||||
// ****** //
|
||||
|
||||
if(input.lengthOf() == output.lengthOf()) {
|
||||
outputRearranged0.assign(input);
|
||||
}
|
||||
else {
|
||||
|
||||
//*** construct reshaping std::vector for second reshape of output array ***//
|
||||
temp.resize(rank);
|
||||
|
||||
temp[0] = input.sizeAt(0);
|
||||
|
||||
for(i = 1; i < rank; ++i)
|
||||
temp[i] = (i <= numOfSpatialDims) ? output.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : output.sizeAt(i);
|
||||
|
||||
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), temp);
|
||||
|
||||
BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchND_, (input, padding, outputRearranged1, numOfSpatialDims), LIBND4J_TYPES);
|
||||
|
||||
if(output.getBuffer() != outputRearranged1.getBuffer())
|
||||
outputRearranged0.assign(outputRearranged1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
template <int N, bool B2S>
|
||||
struct SpaceToBatchHelper {
|
||||
|
|
|
@ -135,10 +135,10 @@ __global__ static void col2imCuda2(const void *columns, void *image, const Nd4jL
|
|||
|
||||
for (int i = (blockDim.x * blockIdx.x) + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
||||
T val = 0;
|
||||
|
||||
int w_im = i % iW + pW;
|
||||
int h_im = (i / iW) % iH + pH;
|
||||
int c_im = i / (iW * iH);
|
||||
|
||||
int b = c_im / iC;
|
||||
int c = c_im % iC;
|
||||
|
||||
|
|
|
@ -39,8 +39,7 @@ static __global__ void vol2colCuda(const void* volume, const Nd4jLong* volShapeI
|
|||
T* col = reinterpret_cast<T*>(columns);
|
||||
|
||||
__shared__ int colRank, volRank;
|
||||
__shared__ Nd4jLong colLen, iD, iH, iW;
|
||||
__shared__ Nd4jLong *sharedMem;
|
||||
__shared__ Nd4jLong colLen, iD, iH, iW, *sharedMem;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
extern __shared__ unsigned char shmem[];
|
||||
|
|
|
@ -38,13 +38,13 @@ __global__ static void batchToSpaceCuda(const void* vx, const Nd4jLong* xShapeIn
|
|||
// else:
|
||||
// oH -> [cropBottom, iH - cropTop]
|
||||
// oW -> [cropLeft, iH - cropRight]
|
||||
// xLen > zLen
|
||||
// xLen >= zLen
|
||||
|
||||
const auto x = reinterpret_cast<const T*>(vx);
|
||||
auto z = reinterpret_cast<T*>(vz);
|
||||
|
||||
__shared__ int rank;
|
||||
__shared__ Nd4jLong zLen, totalThreads, *sharedMem;
|
||||
__shared__ Nd4jLong zLen, *sharedMem;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
extern __shared__ unsigned char shmem[];
|
||||
|
@ -52,7 +52,6 @@ __global__ static void batchToSpaceCuda(const void* vx, const Nd4jLong* xShapeIn
|
|||
|
||||
rank = shape::rank(zShapeInfo);
|
||||
zLen = shape::length(zShapeInfo);
|
||||
totalThreads = gridDim.x * blockDim.x;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
@ -116,6 +115,139 @@ void batchToSpace(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename X, typename Y>
|
||||
__global__ static void batchToSpaceNDCuda(const void* vx, const Nd4jLong* xShapeInfo,
|
||||
const void* vy, const Nd4jLong* yShapeInfo,
|
||||
void* vz, const Nd4jLong* zShapeInfo,
|
||||
const uint numOfSpatialDims) {
|
||||
|
||||
// 4D example, numOfSpatialDims = 2
|
||||
// input [bS, H * blockShape[0], W * blockShape[1], iC]
|
||||
// output [bS, H * blockShape[0] - cropBottom - cropTop, W * blockShape[1] - cropLeft - cropRight, iC]
|
||||
|
||||
// if (cropTop = cropBottom = cropRight = cropLeft = 0) shapes are the same
|
||||
// else:
|
||||
// oH -> [cropBottom, iH - cropTop]
|
||||
// oW -> [cropLeft, iH - cropRight]
|
||||
// xLen >= zLen
|
||||
|
||||
const auto x = reinterpret_cast<const X*>(vx);
|
||||
const auto y = reinterpret_cast<const Y*>(vy);
|
||||
auto z = reinterpret_cast<X*>(vz);
|
||||
|
||||
__shared__ int rank;
|
||||
__shared__ Nd4jLong zLen, *sharedMem;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
|
||||
extern __shared__ unsigned char shmem[];
|
||||
sharedMem = reinterpret_cast<Nd4jLong*>(shmem);
|
||||
|
||||
rank = shape::rank(zShapeInfo);
|
||||
zLen = shape::length(zShapeInfo);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
auto coords = sharedMem + threadIdx.x * rank;
|
||||
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < zLen; i += gridDim.x * blockDim.x) {
|
||||
|
||||
shape::index2coords(rank, zShapeInfo + 1, i, zLen, coords);
|
||||
|
||||
const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + rank + 1, coords, rank);
|
||||
|
||||
// evaluate spatial coordinates for x
|
||||
for(uint j = 1; j <= numOfSpatialDims; ++j) {
|
||||
const auto yOffset = (j - 1) * yShapeInfo[3]; // yRank = 2, calculate offset manually
|
||||
coords[j] += y[yOffset]; // add crop left
|
||||
}
|
||||
|
||||
const auto xOffset = shape::getOffset(0, xShapeInfo + 1, xShapeInfo + rank + 1, coords, rank);
|
||||
|
||||
z[zOffset] = x[xOffset];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename X,typename Y>
|
||||
static void batchToSpaceNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims) {
|
||||
|
||||
batchToSpaceNDCuda<X,Y><<<blocksPerGrid, threadsPerBlock, sharedMem, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, numOfSpatialDims);
|
||||
}
|
||||
BUILD_DOUBLE_TEMPLATE(template void batchToSpaceNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output) {
|
||||
|
||||
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
|
||||
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
|
||||
|
||||
const uint rank = input.rankOf();
|
||||
const uint numOfSpatialDims = blockShape.sizeAt(0);
|
||||
|
||||
//*** construct reshaping std::vector for first reshape of input array ***//
|
||||
|
||||
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < numOfSpatialDims; ++i)
|
||||
temp[i] = blockShape.e<Nd4jLong>(i);
|
||||
temp[i++] = output.sizeAt(0);
|
||||
for(int j = 1; j < rank; ++i, ++j)
|
||||
temp[i] = input.sizeAt(j);
|
||||
|
||||
NDArray inputRearranged0 = input.reshape(input.ordering(), temp);
|
||||
|
||||
//*** construct permuting std::vector for permutation of input array ***//
|
||||
|
||||
temp[0] = numOfSpatialDims;
|
||||
|
||||
for(i = 1; i <= numOfSpatialDims; ++i) {
|
||||
temp[2*i - 1] = numOfSpatialDims + i;
|
||||
temp[2*i] = i - 1;
|
||||
}
|
||||
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
|
||||
temp[i] = i;
|
||||
|
||||
inputRearranged0.permutei(temp);
|
||||
|
||||
|
||||
if(input.lengthOf() == output.lengthOf()) {
|
||||
|
||||
output.assign(inputRearranged0);
|
||||
}
|
||||
else {
|
||||
//*** construct reshaping std::vector for second reshape of input array ***//
|
||||
|
||||
temp.resize(rank);
|
||||
|
||||
temp[0] = output.sizeAt(0);
|
||||
|
||||
for(i = 1; i < rank; ++i)
|
||||
temp[i] = (i <= numOfSpatialDims) ? input.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : input.sizeAt(i);
|
||||
|
||||
NDArray inputRearranged1 = inputRearranged0.reshape(input.ordering(), temp);
|
||||
|
||||
const int threadsPerBlock = MAX_NUM_THREADS / 4;
|
||||
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = threadsPerBlock * sizeof(Nd4jLong) * output.rankOf() + 128;
|
||||
|
||||
PointersManager manager(context, "batchToSpaceND");
|
||||
|
||||
NDArray::prepareSpecialUse({&output}, {&inputRearranged1, &crop});
|
||||
BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), crop.getSpecialBuffer(), crop.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
|
||||
NDArray::registerSpecialUse({&output}, {&inputRearranged1, &crop});
|
||||
|
||||
manager.synchronize();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
__global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) {
|
||||
|
@ -133,7 +265,7 @@ __global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeIn
|
|||
auto z = reinterpret_cast<T*>(vz);
|
||||
|
||||
__shared__ int rank;
|
||||
__shared__ Nd4jLong zLen, totalThreads, *sharedMem;
|
||||
__shared__ Nd4jLong zLen, *sharedMem;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
extern __shared__ unsigned char shmem[];
|
||||
|
@ -141,7 +273,6 @@ __global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeIn
|
|||
|
||||
rank = shape::rank(zShapeInfo);
|
||||
zLen = shape::length(zShapeInfo);
|
||||
totalThreads = gridDim.x * blockDim.x;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
@ -210,6 +341,153 @@ void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
|
|||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename X, typename Y>
|
||||
__global__ static void spaceToBatchNDCuda(const void* vx, const Nd4jLong* xShapeInfo,
|
||||
const void* vy, const Nd4jLong* yShapeInfo,
|
||||
void* vz, const Nd4jLong* zShapeInfo,
|
||||
const uint numOfSpatialDims) {
|
||||
|
||||
// x - input, y - padding, z - output
|
||||
|
||||
// 4D example
|
||||
// input [bS, H * blockShape[0] - padBottom - padTop, W * blockShape[1] - padLeft - padRight, iC]
|
||||
// output [bS, H * blockShape[0], W * blockShape[1], iC]
|
||||
|
||||
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
|
||||
// else:
|
||||
// iH -> [padBottom, oH - padTop]
|
||||
// iW -> [padLeft, oW - padRight]
|
||||
// zLen > xLen
|
||||
|
||||
const auto x = reinterpret_cast<const X*>(vx);
|
||||
const auto y = reinterpret_cast<const Y*>(vy);
|
||||
auto z = reinterpret_cast<X*>(vz);
|
||||
|
||||
__shared__ int rank; // xRank = zRank, yRank = 2;
|
||||
__shared__ Nd4jLong zLen, totalThreads, *sharedMem;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
|
||||
extern __shared__ unsigned char shmem[];
|
||||
sharedMem = reinterpret_cast<Nd4jLong*>(shmem);
|
||||
|
||||
rank = shape::rank(zShapeInfo);
|
||||
zLen = shape::length(zShapeInfo);
|
||||
totalThreads = gridDim.x * blockDim.x;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
auto coords = sharedMem + threadIdx.x * rank;
|
||||
|
||||
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < zLen; i += totalThreads) {
|
||||
|
||||
shape::index2coords(rank, zShapeInfo + 1, i, zLen, coords);
|
||||
|
||||
const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + rank + 1, coords, rank);
|
||||
|
||||
bool within = true;
|
||||
|
||||
for(uint j = 1; j <= numOfSpatialDims; ++j) {
|
||||
|
||||
// yRank = 2, calculate offset manually
|
||||
const auto yOffset = (j - 1) * yShapeInfo[3];
|
||||
const auto padLeft = y[yOffset];
|
||||
const auto padRight = y[yOffset + yShapeInfo[4]];
|
||||
|
||||
within &= (coords[j] >= padLeft && coords[j] < shape::shapeOf(const_cast<Nd4jLong*>(zShapeInfo))[j] - padRight);
|
||||
|
||||
if(!within)
|
||||
break;
|
||||
|
||||
coords[j] -= padLeft; // get coordinates for x
|
||||
}
|
||||
|
||||
if(within)
|
||||
z[zOffset] = x[shape::getOffset(0, xShapeInfo + 1, xShapeInfo + rank + 1, coords, rank)];
|
||||
else
|
||||
z[zOffset] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename X, typename Y>
|
||||
static void spaceToBatchNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims) {
|
||||
|
||||
spaceToBatchNDCuda<X,Y><<<blocksPerGrid, threadsPerBlock, sharedMem, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, numOfSpatialDims);
|
||||
}
|
||||
BUILD_DOUBLE_TEMPLATE(template void spaceToBatchNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output ) {
|
||||
|
||||
// 4D example with two spatial dimensions
|
||||
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockShape[0], (iW + padLeft + padRight)/blockShape[1], iC]
|
||||
|
||||
const uint rank = input.rankOf();
|
||||
|
||||
const uint numOfSpatialDims = blockShape.sizeAt(0);
|
||||
|
||||
//*** construct reshaping std::vector for first reshape of output array ***//
|
||||
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < numOfSpatialDims; ++i)
|
||||
temp[i] = blockShape.e<Nd4jLong>(i);
|
||||
temp[i++] = input.sizeAt(0);
|
||||
for(int j = 1; j < rank; ++i, ++j)
|
||||
temp[i] = output.sizeAt(j);
|
||||
|
||||
NDArray outputRearranged0 = output.reshape(output.ordering(), temp);
|
||||
|
||||
//*** construct permuting std::vector for permutation of output array ***//
|
||||
|
||||
temp[0] = numOfSpatialDims;
|
||||
|
||||
for(i = 1; i <= numOfSpatialDims; ++i) {
|
||||
temp[2*i - 1] = numOfSpatialDims + i;
|
||||
temp[2*i] = i - 1;
|
||||
}
|
||||
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
|
||||
temp[i] = i;
|
||||
|
||||
outputRearranged0.permutei(temp);
|
||||
|
||||
// ****** //
|
||||
|
||||
if(input.lengthOf() == output.lengthOf()) {
|
||||
outputRearranged0.assign(input);
|
||||
}
|
||||
else {
|
||||
|
||||
//*** construct reshaping std::vector for second reshape of output array ***//
|
||||
temp.resize(rank);
|
||||
|
||||
temp[0] = input.sizeAt(0);
|
||||
|
||||
for(i = 1; i < rank; ++i)
|
||||
temp[i] = (i <= numOfSpatialDims) ? output.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : output.sizeAt(i);
|
||||
|
||||
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), temp);
|
||||
|
||||
const int threadsPerBlock = MAX_NUM_THREADS / 4;
|
||||
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = threadsPerBlock * sizeof(Nd4jLong) * output.rankOf() + 128;
|
||||
|
||||
PointersManager manager(context, "spaceToBatchND");
|
||||
|
||||
NDArray::prepareSpecialUse({&outputRearranged1}, {&input, &padding});
|
||||
BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), padding.getSpecialBuffer(), padding.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
|
||||
NDArray::registerSpecialUse({&outputRearranged1}, {&input, &padding});
|
||||
|
||||
manager.synchronize();
|
||||
|
||||
if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer())
|
||||
outputRearranged0.assign(outputRearranged1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
template <int N, bool B2S>
|
||||
|
|
|
@ -31,6 +31,10 @@ namespace helpers {
|
|||
|
||||
void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight, const uint blockSize);
|
||||
|
||||
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output);
|
||||
|
||||
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output);
|
||||
|
||||
/*
|
||||
// this method MUST be platform-specific
|
||||
|
||||
|
|
|
@ -689,3 +689,161 @@ TEST_F(DeclarableOpsTests13, cyclic_rshift_bits_1) {
|
|||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, space_to_batch_nd_1) {
|
||||
|
||||
NDArray x('c', {1, 2, 2, 2, 3}, nd4j::DataType::FLOAT32);
|
||||
NDArray blockShape('c', {3}, {2, 2, 2} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray paddings('c', {3, 2}, {0, 0, 0, 0, 0, 0} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {8, 1, 1, 1, 3}, nd4j::DataType::FLOAT32);
|
||||
|
||||
x.linspace(1);
|
||||
exp.linspace(1);
|
||||
|
||||
nd4j::ops::space_to_batch_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, space_to_batch_nd_2) {
|
||||
|
||||
NDArray x('c', {2, 2,4,3, 1}, nd4j::DataType::FLOAT32);
|
||||
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray paddings('c', {3, 2}, {0,0, 0,2, 2,1} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {24, 1,3,2, 1}, { 0, 2, 0, 8, 0, 0, 0, 26, 0, 32, 0, 0, 0, 3, 0, 9, 0, 0, 0, 27, 0, 33, 0, 0, 1,
|
||||
0, 7, 0, 0, 0, 25, 0, 31, 0, 0, 0, 0, 5, 0, 11, 0, 0, 0, 29, 0, 35, 0, 0, 0, 6,
|
||||
0, 12, 0, 0, 0, 30, 0, 36, 0, 0, 4, 0, 10, 0, 0, 0, 28, 0, 34, 0, 0, 0, 0, 14,
|
||||
0, 20, 0, 0, 0, 38, 0, 44, 0, 0, 0, 15, 0, 21, 0, 0, 0, 39, 0, 45, 0, 0, 13, 0,
|
||||
19, 0, 0, 0, 37, 0, 43, 0, 0, 0, 0, 17, 0, 23, 0, 0, 0, 41, 0, 47, 0, 0, 0, 18,
|
||||
0, 24, 0, 0, 0, 42, 0, 48, 0, 0, 16, 0, 22, 0, 0, 0, 40, 0, 46, 0, 0, 0}, nd4j::DataType::FLOAT32);
|
||||
x.linspace(1);
|
||||
|
||||
nd4j::ops::space_to_batch_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
// z->printBuffer();
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, space_to_batch_nd_3) {
|
||||
|
||||
NDArray x('c', {2, 2,4,3, 1}, nd4j::DataType::FLOAT32);
|
||||
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray paddings('c', {3, 2}, {1,1, 0,2, 2,1} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {24, 2,3,2, 1}, { 0, 0, 0, 0, 0, 0, 0, 14, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15,
|
||||
0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 19, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 37, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 47, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 18, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
|
||||
22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 46, 0, 0, 0, 0, 2, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 32,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 11, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 29, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 36, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 4, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0}, nd4j::DataType::FLOAT32);
|
||||
x.linspace(1);
|
||||
|
||||
nd4j::ops::space_to_batch_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
// z->printBuffer();
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, batch_to_space_nd_1) {
|
||||
|
||||
NDArray x('c', {8, 1, 1, 1, 3}, nd4j::DataType::FLOAT32);
|
||||
|
||||
NDArray blockShape('c', {3}, {2, 2, 2} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray crop('c', {3, 2}, {0, 0, 0, 0, 0, 0} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {1, 2, 2, 2, 3}, nd4j::DataType::FLOAT32);
|
||||
|
||||
x.linspace(1);
|
||||
exp.linspace(1);
|
||||
|
||||
nd4j::ops::batch_to_space_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, batch_to_space_nd_2) {
|
||||
|
||||
NDArray x('c', {24, 1,3,2, 1}, nd4j::DataType::FLOAT32);
|
||||
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray crop('c', {3, 2}, {0,0, 0,2, 2,1} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {2, 2,4,3, 1}, {25, 2, 14, 61, 38, 50, 27, 4, 16, 63, 40, 52, 97, 74, 86, 133, 110, 122, 99, 76, 88, 135, 112, 124,
|
||||
31, 8, 20, 67, 44, 56, 33, 10, 22, 69, 46, 58, 103, 80, 92, 139, 116, 128, 105, 82, 94, 141, 118, 130}, nd4j::DataType::FLOAT32);
|
||||
x.linspace(1);
|
||||
|
||||
nd4j::ops::batch_to_space_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
// z->printBuffer();
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests13, batch_to_space_nd_3) {
|
||||
|
||||
NDArray x('c', {24, 2,3,2, 1}, nd4j::DataType::FLOAT32);
|
||||
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
|
||||
NDArray crop('c', {3, 2}, {1,1, 0,2, 2,1} , nd4j::DataType::INT32);
|
||||
|
||||
NDArray exp('c', {2, 2,4,3, 1}, {193, 146, 170, 265, 218, 242, 195, 148, 172, 267, 220, 244, 55, 8, 32, 127, 80, 104, 57, 10, 34, 129, 82,
|
||||
106, 205, 158, 182, 277, 230, 254, 207, 160, 184, 279, 232, 256, 67, 20, 44, 139, 92, 116, 69, 22, 46, 141, 94, 118}, nd4j::DataType::FLOAT32);
|
||||
x.linspace(1);
|
||||
|
||||
nd4j::ops::batch_to_space_nd op;
|
||||
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
// z->printBuffer();
|
||||
|
||||
ASSERT_TRUE(exp.isSameShape(z));
|
||||
ASSERT_TRUE(exp.equalsTo(z));
|
||||
|
||||
delete result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue