[WIP] stb/bts nd (#144)

* - start working on space_to_batch_nd

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu helper for space_to_batch_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* few typos fixed

Signed-off-by: raver119 <raver119@gmail.com>

* - add tests for space_to_batch and correct bugs

Signed-off-by: Yurii <yurii@skymind.io>

* - write cuda kernel for space_to_batch op

Signed-off-by: Yurii <yurii@skymind.io>

* - add order argument to shape::index2coords method in convolution cuda ops

Signed-off-by: Yurii <yurii@skymind.io>

* - restore some previous code

Signed-off-by: Yurii <yurii@skymind.io>

* old col2im kernel activated

Signed-off-by: raver119 <raver119@gmail.com>

* - change coords calculation in col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - restore old col2im kernel

Signed-off-by: Yurii <yurii@skymind.io>

* - add custom op for batch_to_space

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cpu version for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>

* - provide cuda kernel for batch_to_space_nd op

Signed-off-by: Yurii <yurii@skymind.io>
master
raver119 2019-08-21 21:11:46 +03:00 committed by GitHub
parent e604ffe0d2
commit eea3062ccf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 943 additions and 41 deletions

View File

@ -599,6 +599,7 @@ namespace nd4j {
/** /**
* apply scalar operation to array * apply scalar operation to array
* extraParams - extra parameters for operation * extraParams - extra parameters for operation
* returns scalar array
*/ */
NDArray reduceNumber(nd4j::reduce::FloatOps ops, void *extraParams = nullptr) const; NDArray reduceNumber(nd4j::reduce::FloatOps ops, void *extraParams = nullptr) const;
NDArray reduceNumber(nd4j::reduce::SameOps ops, void *extraParams = nullptr) const; NDArray reduceNumber(nd4j::reduce::SameOps ops, void *extraParams = nullptr) const;

View File

@ -30,7 +30,6 @@ limitations under the License.
==============================================================================*/ ==============================================================================*/
// //
// @author raver119@gmail.com, created on 19.01.18.
// @author Yurii Shyrma (iuriish@yahoo.com) // @author Yurii Shyrma (iuriish@yahoo.com)
// //
@ -63,9 +62,8 @@ CUSTOM_OP_IMPL(batch_to_space, 2, 1, false, 0, 1) {
REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank); REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank);
REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0); REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0);
const std::string expectedCropShape = "[2, 2]"; if(crop->sizeAt(0) != 2 || crop->sizeAt(1) != 2)
const std::string actualCropShape = ShapeUtils::shapeAsString(crop); REQUIRE_TRUE(false, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(crop).c_str());
REQUIRE_TRUE(actualCropShape == expectedCropShape, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", actualCropShape.c_str());
const uint cropBottom = crop->e<uint>(0,0); const uint cropBottom = crop->e<uint>(0,0);
const uint cropTop = crop->e<uint>(0,1); const uint cropTop = crop->e<uint>(0,1);
@ -104,9 +102,8 @@ DECLARE_SHAPE_FN(batch_to_space) {
REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank); REQUIRE_TRUE(rank == 4, 0, "BatchToSpace: rank of input array must be equal 4, but got %i instead", rank);
REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0); REQUIRE_TRUE(dim0 % (blockSize * blockSize) == 0, 0, "BatchToSpace: first dimension of input array must be divisible by blockSize * blockSize (that is by %i), but got first dimension equal to %i", blockSize * blockSize, dim0);
const std::string expectedCropShape = "[2, 2]"; if(cropShapeInfo[1] != 2 || cropShapeInfo[2] != 2)
const std::string actualCropShape = ShapeUtils::shapeAsString(cropShapeInfo); REQUIRE_TRUE(false, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(cropShapeInfo).c_str());
REQUIRE_TRUE(actualCropShape == expectedCropShape, 0, "BatchToSpace: operation expects crop shape to be {2, 2}, but got %s instead", actualCropShape.c_str());
const uint cropBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0); const uint cropBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0);
const uint cropTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1); const uint cropTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1);

View File

@ -0,0 +1,131 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
//
// @author Yurii Shyrma (iuriish@yahoo.com)
//
#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_batch_to_space_nd)
#include <ops/declarable/headers/parity_ops.h>
#include <ops/declarable/helpers/s_t_b.h>
namespace nd4j {
namespace ops {
CUSTOM_OP_IMPL(batch_to_space_nd, 3, 1, false, 0, 0) {
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
auto input = INPUT_VARIABLE(0);
auto blockShape = INPUT_VARIABLE(1);
auto crop = INPUT_VARIABLE(2);
auto output = OUTPUT_VARIABLE(0);
REQUIRE_TRUE(blockShape->rankOf() == 1, 0, "BatchToSpaceND: rank of blockShape array must be equal to one, but got %i instead !", blockShape->rankOf());
const uint numOfSpatialDims = blockShape->sizeAt(0);
const auto product = blockShape->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
REQUIRE_TRUE(input->sizeAt(0) % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, input->sizeAt(0));
// FIXME - should we use this time-consuming validation ?
for (uint i = 0; i < numOfSpatialDims; ++i) {
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
REQUIRE_TRUE(blockSize >= 2, 0, "BatchToSpaceND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
}
if(crop->sizeAt(0) != numOfSpatialDims || crop->sizeAt(1) != 2) {
const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(crop).c_str());
}
// FIXME - should we use this time-consuming validation ?
for (uint i = 0; i < numOfSpatialDims; ++i) {
const auto cropLeft = crop->e<uint>(i,0);
const auto cropRight = crop->e<uint>(i,1);
const auto outSpatialDim = input->sizeAt(i + 1) * blockShape->e<Nd4jLong>(i) - cropLeft - cropRight;
REQUIRE_TRUE(outSpatialDim >= 0, 0, "BatchToSpaceND: crop left/right values are too big and cause negative output spatial dimension/dimensions !");
}
helpers::batchToSpaceND(block.launchContext(), *input, *blockShape, *crop, *output);
return Status::OK();
}
////////////////////////////////////////////////////////////////////////////////
DECLARE_TYPES(batch_to_space_nd) {
getOpDescriptor()->setAllowedInputTypes(0, nd4j::DataType::ANY)
->setAllowedInputTypes(1, {ALL_INTS})
->setAllowedInputTypes(2, {ALL_INTS})
->setSameMode(true);
}
////////////////////////////////////////////////////////////////////////////////
DECLARE_SHAPE_FN(batch_to_space_nd) {
auto inputShapeInfo = inputShape->at(0);
auto blockShapeInfo = inputShape->at(1);
auto cropShapeInfo = inputShape->at(2);
REQUIRE_TRUE(blockShapeInfo[0] == 1, 0, "BatchToSpaceND: rank of blockShape array must be equal to one, but got %i instead !", blockShapeInfo[0]);
const auto product = INPUT_VARIABLE(1)->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
REQUIRE_TRUE(inputShapeInfo[1] % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, inputShapeInfo[1]);
const auto numOfSpatialDims = blockShapeInfo[1];
if(cropShapeInfo[1] != numOfSpatialDims || cropShapeInfo[2] != 2) {
const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(cropShapeInfo).c_str());
}
std::vector<Nd4jLong> outShape(inputShapeInfo + 1, inputShapeInfo + 1 + inputShapeInfo[0]);
outShape[0] /= product;
for (uint i = 0; i < numOfSpatialDims; ++i)
outShape[i + 1] = outShape[i + 1] * INPUT_VARIABLE(1)->e<Nd4jLong>(i) - INPUT_VARIABLE(2)->e<uint>(i,0) - INPUT_VARIABLE(2)->e<uint>(i,1);
return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
}
}
}
#endif

View File

@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
// //
// @author raver119@gmail.com, created on 19.01.18.
// @author Yurii Shyrma (iuriish@yahoo.com) // @author Yurii Shyrma (iuriish@yahoo.com)
// @author raver119@gmail.com
// //
#include <op_boilerplate.h> #include <op_boilerplate.h>
@ -39,12 +39,11 @@ CUSTOM_OP_IMPL(space_to_batch, 2, 1, false, 0, 1) {
const uint blockSize = INT_ARG(0); const uint blockSize = INT_ARG(0);
REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatch: integer parameter block_size must be >= 2, but got %i instead", blockSize); REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatch: integer parameter block_size must be >= 2, but got %i instead", blockSize);
const int rank = input->rankOf(); REQUIRE_TRUE(input->rankOf() == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", input->rankOf());
REQUIRE_TRUE(rank == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", rank); REQUIRE_TRUE(output->rankOf() == 4, 0, "SpaceToBatch: rank of output array must be equal 4, but got %i instead", output->rankOf());
const std::string expectedpaddingShape = "[2, 2]"; if(padding->sizeAt(0) != 2 || padding->sizeAt(1) != 2)
const std::string actualpaddingShape = ShapeUtils::shapeAsString(padding); REQUIRE_TRUE(false, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(padding).c_str());
REQUIRE_TRUE(actualpaddingShape == expectedpaddingShape, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", actualpaddingShape.c_str());
const uint padBottom = padding->e<uint>(0,0); const uint padBottom = padding->e<uint>(0,0);
const uint padTop = padding->e<uint>(0,1); const uint padTop = padding->e<uint>(0,1);
@ -78,9 +77,8 @@ DECLARE_SHAPE_FN(space_to_batch) {
const int rank = inputShapeInfo[0]; const int rank = inputShapeInfo[0];
REQUIRE_TRUE(rank == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", rank); REQUIRE_TRUE(rank == 4, 0, "SpaceToBatch: rank of input array must be equal 4, but got %i instead", rank);
const std::string expectedpaddingShape = "[2, 2]"; if(paddingShapeInfo[1] != 2 || paddingShapeInfo[1] != 2)
const std::string actualpaddingShape = ShapeUtils::shapeAsString(paddingShapeInfo); REQUIRE_TRUE(false, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", ShapeUtils::shapeAsString(paddingShapeInfo).c_str());
REQUIRE_TRUE(actualpaddingShape == expectedpaddingShape, 0, "SpaceToBatch: operation expects padding shape to be {2, 2}, but got %s instead", actualpaddingShape.c_str());
const uint padBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0); const uint padBottom = INPUT_VARIABLE(1)->e<Nd4jLong>(0,0);
const uint padTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1); const uint padTop = INPUT_VARIABLE(1)->e<Nd4jLong>(0,1);

View File

@ -0,0 +1,108 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
//
// @author Yurii Shyrma (iuriish@yahoo.com)
//
#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_space_to_batch_nd)
#include <ops/declarable/headers/parity_ops.h>
#include <ops/declarable/helpers/s_t_b.h>
namespace nd4j {
namespace ops {
CUSTOM_OP_IMPL(space_to_batch_nd, 3, 1, false, 0, 0) {
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockSize[0], (iW + padLeft + padRight)/blockSize[1], iC]
auto input = INPUT_VARIABLE(0);
auto blockShape = INPUT_VARIABLE(1);
auto padding = INPUT_VARIABLE(2);
auto output = OUTPUT_VARIABLE(0);
REQUIRE_TRUE(blockShape->rankOf() == 1, 0, "SpaceToBatchND: rank of blockShape array must be equal to one, but got %i instead !", blockShape->rankOf());
const uint numOfSpatialDims = blockShape->sizeAt(0);
REQUIRE_TRUE(input->rankOf() == output->rankOf(), 0, "SpaceToBatchND: rank of input and output array must be the same, but got %i and %i correspondingly !", input->rankOf(), output->rankOf());
// FIXME - should we use this time-consuming validation ?
for (uint i = 0; i < numOfSpatialDims; ++i) {
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatchND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
}
if(padding->sizeAt(0) != numOfSpatialDims || padding->sizeAt(1) != 2) {
const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(padding).c_str());
}
// FIXME - should we use this time-consuming validation ?
for (uint i = 0; i < numOfSpatialDims; ++i) {
const uint padLeft = padding->e<uint>(i,0);
const uint padRight = padding->e<uint>(i,1);
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
REQUIRE_TRUE((input->sizeAt(i + 1) + padLeft + padRight) % blockSize == 0, 0, "SpaceToBatchND: after padding, spatial dimensions of input array must be divisible by blockSize !");
}
helpers::spaceToBatchND(block.launchContext(), *input, *blockShape, *padding, *output);
return Status::OK();
}
////////////////////////////////////////////////////////////////////////////////
DECLARE_TYPES(space_to_batch_nd) {
getOpDescriptor()->setAllowedInputTypes(0, nd4j::DataType::ANY)
->setAllowedInputTypes(1, {ALL_INTS})
->setAllowedInputTypes(2, {ALL_INTS})
->setSameMode(true);
}
////////////////////////////////////////////////////////////////////////////////
DECLARE_SHAPE_FN(space_to_batch_nd) {
auto inputShapeInfo = inputShape->at(0);
auto blockShapeInfo = inputShape->at(1);
auto paddingShapeInfo = inputShape->at(2);
REQUIRE_TRUE(blockShapeInfo[0] == 1, 0, "SpaceToBatchND: rank of blockShape array must be equal to one, but got %i instead !", blockShapeInfo[0]);
const uint numOfSpatialDims = blockShapeInfo[1];
if(paddingShapeInfo[1] != numOfSpatialDims || paddingShapeInfo[2] != 2) {
const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(paddingShapeInfo).c_str());
}
std::vector<Nd4jLong> outShape(inputShapeInfo + 1, inputShapeInfo + 1 + inputShapeInfo[0]);
outShape[0] *= INPUT_VARIABLE(1)->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
for (uint i = 0; i < numOfSpatialDims; ++i)
outShape[i + 1] = (outShape[i + 1] + INPUT_VARIABLE(2)->e<uint>(i,0) + INPUT_VARIABLE(2)->e<uint>(i,1)) / INPUT_VARIABLE(1)->e<Nd4jLong>(i);
return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShapeInfo), 'c', outShape));
}
}
}
#endif

View File

@ -617,6 +617,10 @@ namespace nd4j {
DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1); DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1);
#endif #endif
#if NOT_EXCLUDED(OP_space_to_batch_nd)
DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0);
#endif
/** /**
* *
* *
@ -624,6 +628,9 @@ namespace nd4j {
#if NOT_EXCLUDED(OP_batch_to_space) #if NOT_EXCLUDED(OP_batch_to_space)
DECLARE_CUSTOM_OP(batch_to_space, 2, 1, false, 0, 1); DECLARE_CUSTOM_OP(batch_to_space, 2, 1, false, 0, 1);
#endif #endif
#if NOT_EXCLUDED(OP_batch_to_space_nd)
DECLARE_CUSTOM_OP(batch_to_space_nd, 3, 1, false, 0, 0);
#endif
/** /**
* top_k operation returns a vector of k top values for * top_k operation returns a vector of k top values for

View File

@ -15,8 +15,8 @@
******************************************************************************/ ******************************************************************************/
// //
// @author raver119@gmail.com, created on 19.01.18.
// @author Yurii Shyrma (iuriish@yahoo.com) // @author Yurii Shyrma (iuriish@yahoo.com)
// @author raver119@gmail.com
// //
#include <ops/declarable/helpers/s_t_b.h> #include <ops/declarable/helpers/s_t_b.h>
@ -90,13 +90,107 @@ void batchToSpace(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
} }
} }
//////////////////////////////////////////////////////////////////////////
template <typename T>
static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& output, const uint numOfSpatialDims) {
// input [bS, H * blockShape[0], W * blockShape[1], iC]
// output [bS, H * blockShape[0] - cropBottom - cropTop, W * blockShape[1] - cropLeft - cropRight, iC]
// if (cropTop = cropBottom = cropRight = cropLeft = 0) shapes are the same
// else:
// oH -> [cropBottom, iH - cropTop]
// oW -> [cropLeft, iH - cropRight]
// xLen >= zLen
const T* x = input.bufferAsT<T>();
T* z = output.bufferAsT<T>();
const int rank = input.rankOf();
const Nd4jLong zLen = output.lengthOf();
std::vector<Nd4jLong> coords(rank);
// loop through input array
PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) firstprivate(coords))
for (Nd4jLong i = 0; i < zLen; ++i) {
shape::index2coords(rank, output.shapeOf(), i, zLen, coords.data());
const auto zOffset = shape::getOffset(0, output.shapeOf(), output.stridesOf(), coords.data(), rank);
// evaluate spatial coordinates for x
for(uint j = 1; j <= numOfSpatialDims; ++j)
coords[j] += crop.e<uint>(j - 1, 0); // add crop left
z[zOffset] = x[shape::getOffset(0, input.shapeOf(), input.stridesOf(), coords.data(), rank)];
}
}
BUILD_SINGLE_TEMPLATE(template void batchToSpaceND_, (const NDArray& input, const NDArray& crop, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES);
//////////////////////////////////////////////////////////////////////////
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output) {
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
const uint rank = input.rankOf();
const uint numOfSpatialDims = blockShape.sizeAt(0);
//*** construct reshaping std::vector for first reshape of input array ***//
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
int i;
for(i = 0; i < numOfSpatialDims; ++i)
temp[i] = blockShape.e<Nd4jLong>(i);
temp[i++] = output.sizeAt(0);
for(int j = 1; j < rank; ++i, ++j)
temp[i] = input.sizeAt(j);
NDArray inputRearranged0 = input.reshape(input.ordering(), temp);
//*** construct permuting std::vector for permutation of input array ***//
temp[0] = numOfSpatialDims;
for(i = 1; i <= numOfSpatialDims; ++i) {
temp[2*i - 1] = numOfSpatialDims + i;
temp[2*i] = i - 1;
}
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
temp[i] = i;
inputRearranged0.permutei(temp);
if(input.lengthOf() == output.lengthOf()) {
output.assign(inputRearranged0);
}
else {
//*** construct reshaping std::vector for second reshape of input array ***//
temp.resize(rank);
temp[0] = output.sizeAt(0);
for(i = 1; i < rank; ++i)
temp[i] = (i <= numOfSpatialDims) ? input.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : input.sizeAt(i);
NDArray inputRearranged1 = inputRearranged0.reshape(input.ordering(), temp);
BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceND_, (inputRearranged1, crop, output, numOfSpatialDims), LIBND4J_TYPES);
}
}
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <typename T> template <typename T>
static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) { static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) {
// input [bS, H * blockSize - padBottom - padTop, W * blockSize - padLeft - padRight, iC] // input [bS, H * blockSize - padBottom - padTop, W * blockSize - padLeft - padRight, iC]
// output [bs, H * blockSize, W * blockSize, iC] // output [bS, H * blockSize, W * blockSize, iC]
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same // if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
// else: // else:
@ -145,26 +239,153 @@ void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockSize*blockSize, (iH + padBottom + padTop)/blockSize, (iW + padLeft + padRight)/blockSize, iC] // [bS, iH, iW, iC] is rearranged/permuted to [bS*blockSize*blockSize, (iH + padBottom + padTop)/blockSize, (iW + padLeft + padRight)/blockSize, iC]
NDArray outputRearranged0 = output.reshape(output.ordering(), {blockSize, blockSize, input.sizeAt(0), output.sizeAt(1), output.sizeAt(2), input.sizeAt(3)}); NDArray outputRearranged0 = output.reshape(output.ordering(), {blockSize, blockSize, input.sizeAt(0), output.sizeAt(1), output.sizeAt(2), output.sizeAt(3)});
outputRearranged0.permutei({2, 3,0, 4,1, 5}); outputRearranged0.permutei({2, 3,0, 4,1, 5});
if(input.lengthOf() == output.lengthOf()) { if(input.lengthOf() == output.lengthOf()) {
outputRearranged0.assign(input); outputRearranged0.assign(input);
} }
else { else {
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, input.sizeAt(3)}); NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, output.sizeAt(3)});
BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatch_, (input, outputRearranged1, padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatch_, (input, outputRearranged1, padBottom, padTop, padLeft, padRight), LIBND4J_TYPES);
if(output.getBuffer() != outputRearranged1.getBuffer()) if(output.getBuffer() != outputRearranged1.getBuffer())
outputRearranged0.assign(outputRearranged1); outputRearranged0.assign(outputRearranged1);
} }
} }
//////////////////////////////////////////////////////////////////////////
template <typename T>
static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArray& output, const uint numOfSpatialDims) {
// 4D example
// input [bS, H * blockShape[0] - padBottom - padTop, W * blockShape[1] - padLeft - padRight, iC]
// output [bS, H * blockShape[0], W * blockShape[1], iC]
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
// else:
// iH -> [padBottom, oH - padTop]
// iW -> [padLeft, oW - padRight]
// zLen > xLen
const T* x = input.bufferAsT<T>();
T* z = output.bufferAsT<T>();
const int rank = input.rankOf();
const Nd4jLong zLen = output.lengthOf();
std::vector<Nd4jLong> coords(rank);
// loop through output array
PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) firstprivate(coords))
for (Nd4jLong i = 0; i < zLen; ++i) {
shape::index2coords(rank, output.shapeOf(), i, zLen, coords.data());
const auto zOffset = shape::getOffset(0, output.shapeOf(), output.stridesOf(), coords.data(), rank);
bool within = true;
for(uint j = 1; j <= numOfSpatialDims; ++j) {
const auto padLeft = padding.e<uint>(j - 1, 0);
const auto padRight = padding.e<uint>(j - 1, 1);
within &= (coords[j] >= padLeft && coords[j] < output.sizeAt(j) - padRight);
if(!within)
break;
coords[j] -= padLeft; // get coordinates for x
}
if(within)
z[zOffset] = x[shape::getOffset(0, input.shapeOf(), input.stridesOf(), coords.data(), rank)];
else
z[zOffset] = 0.f;
}
}
BUILD_SINGLE_TEMPLATE(template void spaceToBatchND_, (const NDArray& input, const NDArray& padding, NDArray& output, const uint numOfSpatialDims), LIBND4J_TYPES);
//////////////////////////////////////////////////////////////////////////
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output ) {
// 4D example with two spatial dimensions
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockShape[0], (iW + padLeft + padRight)/blockShape[1], iC]
const uint rank = input.rankOf();
const uint numOfSpatialDims = blockShape.sizeAt(0);
//*** construct reshaping std::vector for first reshape of output array ***//
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
int i;
for(i = 0; i < numOfSpatialDims; ++i)
temp[i] = blockShape.e<Nd4jLong>(i);
temp[i++] = input.sizeAt(0);
for(int j = 1; j < rank; ++i, ++j)
temp[i] = output.sizeAt(j);
NDArray outputRearranged0 = output.reshape(output.ordering(), temp);
//*** construct permuting std::vector for permutation of output array ***//
temp[0] = numOfSpatialDims;
for(i = 1; i <= numOfSpatialDims; ++i) {
temp[2*i - 1] = numOfSpatialDims + i;
temp[2*i] = i - 1;
}
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
temp[i] = i;
outputRearranged0.permutei(temp);
// ****** //
if(input.lengthOf() == output.lengthOf()) {
outputRearranged0.assign(input);
}
else {
//*** construct reshaping std::vector for second reshape of output array ***//
temp.resize(rank);
temp[0] = input.sizeAt(0);
for(i = 1; i < rank; ++i)
temp[i] = (i <= numOfSpatialDims) ? output.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : output.sizeAt(i);
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), temp);
BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchND_, (input, padding, outputRearranged1, numOfSpatialDims), LIBND4J_TYPES);
if(output.getBuffer() != outputRearranged1.getBuffer())
outputRearranged0.assign(outputRearranged1);
}
}
/* /*
template <int N, bool B2S> template <int N, bool B2S>
struct SpaceToBatchHelper { struct SpaceToBatchHelper {

View File

@ -135,10 +135,10 @@ __global__ static void col2imCuda2(const void *columns, void *image, const Nd4jL
for (int i = (blockDim.x * blockIdx.x) + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { for (int i = (blockDim.x * blockIdx.x) + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
T val = 0; T val = 0;
int w_im = i % iW + pW; int w_im = i % iW + pW;
int h_im = (i / iW) % iH + pH; int h_im = (i / iW) % iH + pH;
int c_im = i / (iW * iH); int c_im = i / (iW * iH);
int b = c_im / iC; int b = c_im / iC;
int c = c_im % iC; int c = c_im % iC;

View File

@ -39,8 +39,7 @@ static __global__ void vol2colCuda(const void* volume, const Nd4jLong* volShapeI
T* col = reinterpret_cast<T*>(columns); T* col = reinterpret_cast<T*>(columns);
__shared__ int colRank, volRank; __shared__ int colRank, volRank;
__shared__ Nd4jLong colLen, iD, iH, iW; __shared__ Nd4jLong colLen, iD, iH, iW, *sharedMem;
__shared__ Nd4jLong *sharedMem;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
extern __shared__ unsigned char shmem[]; extern __shared__ unsigned char shmem[];

View File

@ -38,13 +38,13 @@ __global__ static void batchToSpaceCuda(const void* vx, const Nd4jLong* xShapeIn
// else: // else:
// oH -> [cropBottom, iH - cropTop] // oH -> [cropBottom, iH - cropTop]
// oW -> [cropLeft, iH - cropRight] // oW -> [cropLeft, iH - cropRight]
// xLen > zLen // xLen >= zLen
const auto x = reinterpret_cast<const T*>(vx); const auto x = reinterpret_cast<const T*>(vx);
auto z = reinterpret_cast<T*>(vz); auto z = reinterpret_cast<T*>(vz);
__shared__ int rank; __shared__ int rank;
__shared__ Nd4jLong zLen, totalThreads, *sharedMem; __shared__ Nd4jLong zLen, *sharedMem;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
extern __shared__ unsigned char shmem[]; extern __shared__ unsigned char shmem[];
@ -52,7 +52,6 @@ __global__ static void batchToSpaceCuda(const void* vx, const Nd4jLong* xShapeIn
rank = shape::rank(zShapeInfo); rank = shape::rank(zShapeInfo);
zLen = shape::length(zShapeInfo); zLen = shape::length(zShapeInfo);
totalThreads = gridDim.x * blockDim.x;
} }
__syncthreads(); __syncthreads();
@ -116,6 +115,139 @@ void batchToSpace(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
} }
} }
///////////////////////////////////////////////////////////////////
template<typename X, typename Y>
__global__ static void batchToSpaceNDCuda(const void* vx, const Nd4jLong* xShapeInfo,
const void* vy, const Nd4jLong* yShapeInfo,
void* vz, const Nd4jLong* zShapeInfo,
const uint numOfSpatialDims) {
// 4D example, numOfSpatialDims = 2
// input [bS, H * blockShape[0], W * blockShape[1], iC]
// output [bS, H * blockShape[0] - cropBottom - cropTop, W * blockShape[1] - cropLeft - cropRight, iC]
// if (cropTop = cropBottom = cropRight = cropLeft = 0) shapes are the same
// else:
// oH -> [cropBottom, iH - cropTop]
// oW -> [cropLeft, iH - cropRight]
// xLen >= zLen
const auto x = reinterpret_cast<const X*>(vx);
const auto y = reinterpret_cast<const Y*>(vy);
auto z = reinterpret_cast<X*>(vz);
__shared__ int rank;
__shared__ Nd4jLong zLen, *sharedMem;
if (threadIdx.x == 0) {
extern __shared__ unsigned char shmem[];
sharedMem = reinterpret_cast<Nd4jLong*>(shmem);
rank = shape::rank(zShapeInfo);
zLen = shape::length(zShapeInfo);
}
__syncthreads();
auto coords = sharedMem + threadIdx.x * rank;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < zLen; i += gridDim.x * blockDim.x) {
shape::index2coords(rank, zShapeInfo + 1, i, zLen, coords);
const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + rank + 1, coords, rank);
// evaluate spatial coordinates for x
for(uint j = 1; j <= numOfSpatialDims; ++j) {
const auto yOffset = (j - 1) * yShapeInfo[3]; // yRank = 2, calculate offset manually
coords[j] += y[yOffset]; // add crop left
}
const auto xOffset = shape::getOffset(0, xShapeInfo + 1, xShapeInfo + rank + 1, coords, rank);
z[zOffset] = x[xOffset];
}
}
///////////////////////////////////////////////////////////////////
template<typename X,typename Y>
static void batchToSpaceNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims) {
batchToSpaceNDCuda<X,Y><<<blocksPerGrid, threadsPerBlock, sharedMem, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, numOfSpatialDims);
}
BUILD_DOUBLE_TEMPLATE(template void batchToSpaceNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
//////////////////////////////////////////////////////////////////////////
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output) {
// 4D example, numOfSpatialDims = 2 - two spatial dimensions
// [bS*blockShape[0]*blockShape[1], iH, iW, iC] is rearranged/permuted to [bS, iH*blockShape[0] - cropTop - cropBottom, iW*blockShape[1] - cropLeft - cropRight, iC]
const uint rank = input.rankOf();
const uint numOfSpatialDims = blockShape.sizeAt(0);
//*** construct reshaping std::vector for first reshape of input array ***//
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
int i;
for(i = 0; i < numOfSpatialDims; ++i)
temp[i] = blockShape.e<Nd4jLong>(i);
temp[i++] = output.sizeAt(0);
for(int j = 1; j < rank; ++i, ++j)
temp[i] = input.sizeAt(j);
NDArray inputRearranged0 = input.reshape(input.ordering(), temp);
//*** construct permuting std::vector for permutation of input array ***//
temp[0] = numOfSpatialDims;
for(i = 1; i <= numOfSpatialDims; ++i) {
temp[2*i - 1] = numOfSpatialDims + i;
temp[2*i] = i - 1;
}
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
temp[i] = i;
inputRearranged0.permutei(temp);
if(input.lengthOf() == output.lengthOf()) {
output.assign(inputRearranged0);
}
else {
//*** construct reshaping std::vector for second reshape of input array ***//
temp.resize(rank);
temp[0] = output.sizeAt(0);
for(i = 1; i < rank; ++i)
temp[i] = (i <= numOfSpatialDims) ? input.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : input.sizeAt(i);
NDArray inputRearranged1 = inputRearranged0.reshape(input.ordering(), temp);
const int threadsPerBlock = MAX_NUM_THREADS / 4;
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = threadsPerBlock * sizeof(Nd4jLong) * output.rankOf() + 128;
PointersManager manager(context, "batchToSpaceND");
NDArray::prepareSpecialUse({&output}, {&inputRearranged1, &crop});
BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), crop.getSpecialBuffer(), crop.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
NDArray::registerSpecialUse({&output}, {&inputRearranged1, &crop});
manager.synchronize();
}
}
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<typename T> template<typename T>
__global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) { __global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight) {
@ -133,7 +265,7 @@ __global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeIn
auto z = reinterpret_cast<T*>(vz); auto z = reinterpret_cast<T*>(vz);
__shared__ int rank; __shared__ int rank;
__shared__ Nd4jLong zLen, totalThreads, *sharedMem; __shared__ Nd4jLong zLen, *sharedMem;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
extern __shared__ unsigned char shmem[]; extern __shared__ unsigned char shmem[];
@ -141,7 +273,6 @@ __global__ static void spaceToBatchCuda(const void* vx, const Nd4jLong* xShapeIn
rank = shape::rank(zShapeInfo); rank = shape::rank(zShapeInfo);
zLen = shape::length(zShapeInfo); zLen = shape::length(zShapeInfo);
totalThreads = gridDim.x * blockDim.x;
} }
__syncthreads(); __syncthreads();
@ -210,6 +341,153 @@ void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& o
} }
} }
///////////////////////////////////////////////////////////////////
template<typename X, typename Y>
__global__ static void spaceToBatchNDCuda(const void* vx, const Nd4jLong* xShapeInfo,
const void* vy, const Nd4jLong* yShapeInfo,
void* vz, const Nd4jLong* zShapeInfo,
const uint numOfSpatialDims) {
// x - input, y - padding, z - output
// 4D example
// input [bS, H * blockShape[0] - padBottom - padTop, W * blockShape[1] - padLeft - padRight, iC]
// output [bS, H * blockShape[0], W * blockShape[1], iC]
// if (padTop = padBottom = padRight = padLeft = 0) shapes are the same
// else:
// iH -> [padBottom, oH - padTop]
// iW -> [padLeft, oW - padRight]
// zLen > xLen
const auto x = reinterpret_cast<const X*>(vx);
const auto y = reinterpret_cast<const Y*>(vy);
auto z = reinterpret_cast<X*>(vz);
__shared__ int rank; // xRank = zRank, yRank = 2;
__shared__ Nd4jLong zLen, totalThreads, *sharedMem;
if (threadIdx.x == 0) {
extern __shared__ unsigned char shmem[];
sharedMem = reinterpret_cast<Nd4jLong*>(shmem);
rank = shape::rank(zShapeInfo);
zLen = shape::length(zShapeInfo);
totalThreads = gridDim.x * blockDim.x;
}
__syncthreads();
auto coords = sharedMem + threadIdx.x * rank;
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < zLen; i += totalThreads) {
shape::index2coords(rank, zShapeInfo + 1, i, zLen, coords);
const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + rank + 1, coords, rank);
bool within = true;
for(uint j = 1; j <= numOfSpatialDims; ++j) {
// yRank = 2, calculate offset manually
const auto yOffset = (j - 1) * yShapeInfo[3];
const auto padLeft = y[yOffset];
const auto padRight = y[yOffset + yShapeInfo[4]];
within &= (coords[j] >= padLeft && coords[j] < shape::shapeOf(const_cast<Nd4jLong*>(zShapeInfo))[j] - padRight);
if(!within)
break;
coords[j] -= padLeft; // get coordinates for x
}
if(within)
z[zOffset] = x[shape::getOffset(0, xShapeInfo + 1, xShapeInfo + rank + 1, coords, rank)];
else
z[zOffset] = 0.f;
}
}
///////////////////////////////////////////////////////////////////
template<typename X, typename Y>
static void spaceToBatchNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims) {
spaceToBatchNDCuda<X,Y><<<blocksPerGrid, threadsPerBlock, sharedMem, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, numOfSpatialDims);
}
BUILD_DOUBLE_TEMPLATE(template void spaceToBatchNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void* vx, const Nd4jLong* xShapeInfo, const void* vy, const Nd4jLong* yShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const uint numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
//////////////////////////////////////////////////////////////////////////
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output ) {
// 4D example with two spatial dimensions
// [bS, iH, iW, iC] is rearranged/permuted to [bS*blockShape[0]*blockShape[1], (iH + padBottom + padTop)/blockShape[0], (iW + padLeft + padRight)/blockShape[1], iC]
const uint rank = input.rankOf();
const uint numOfSpatialDims = blockShape.sizeAt(0);
//*** construct reshaping std::vector for first reshape of output array ***//
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
int i;
for(i = 0; i < numOfSpatialDims; ++i)
temp[i] = blockShape.e<Nd4jLong>(i);
temp[i++] = input.sizeAt(0);
for(int j = 1; j < rank; ++i, ++j)
temp[i] = output.sizeAt(j);
NDArray outputRearranged0 = output.reshape(output.ordering(), temp);
//*** construct permuting std::vector for permutation of output array ***//
temp[0] = numOfSpatialDims;
for(i = 1; i <= numOfSpatialDims; ++i) {
temp[2*i - 1] = numOfSpatialDims + i;
temp[2*i] = i - 1;
}
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
temp[i] = i;
outputRearranged0.permutei(temp);
// ****** //
if(input.lengthOf() == output.lengthOf()) {
outputRearranged0.assign(input);
}
else {
//*** construct reshaping std::vector for second reshape of output array ***//
temp.resize(rank);
temp[0] = input.sizeAt(0);
for(i = 1; i < rank; ++i)
temp[i] = (i <= numOfSpatialDims) ? output.sizeAt(i) * blockShape.e<Nd4jLong>(i - 1) : output.sizeAt(i);
NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), temp);
const int threadsPerBlock = MAX_NUM_THREADS / 4;
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = threadsPerBlock * sizeof(Nd4jLong) * output.rankOf() + 128;
PointersManager manager(context, "spaceToBatchND");
NDArray::prepareSpecialUse({&outputRearranged1}, {&input, &padding});
BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), padding.getSpecialBuffer(), padding.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES);
NDArray::registerSpecialUse({&outputRearranged1}, {&input, &padding});
manager.synchronize();
if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer())
outputRearranged0.assign(outputRearranged1);
}
}
/* /*
template <int N, bool B2S> template <int N, bool B2S>

View File

@ -31,6 +31,10 @@ namespace helpers {
void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight, const uint blockSize); void spaceToBatch(nd4j::LaunchContext* context, const NDArray& input, NDArray& output, const uint padBottom, const uint padTop, const uint padLeft, const uint padRight, const uint blockSize);
void spaceToBatchND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& padding, NDArray& output);
void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const NDArray& blockShape, const NDArray& crop, NDArray& output);
/* /*
// this method MUST be platform-specific // this method MUST be platform-specific

View File

@ -689,3 +689,161 @@ TEST_F(DeclarableOpsTests13, cyclic_rshift_bits_1) {
delete result; delete result;
} }
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, space_to_batch_nd_1) {
NDArray x('c', {1, 2, 2, 2, 3}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 2} , nd4j::DataType::INT32); // three spatial dimensions
NDArray paddings('c', {3, 2}, {0, 0, 0, 0, 0, 0} , nd4j::DataType::INT32);
NDArray exp('c', {8, 1, 1, 1, 3}, nd4j::DataType::FLOAT32);
x.linspace(1);
exp.linspace(1);
nd4j::ops::space_to_batch_nd op;
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, space_to_batch_nd_2) {
NDArray x('c', {2, 2,4,3, 1}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
NDArray paddings('c', {3, 2}, {0,0, 0,2, 2,1} , nd4j::DataType::INT32);
NDArray exp('c', {24, 1,3,2, 1}, { 0, 2, 0, 8, 0, 0, 0, 26, 0, 32, 0, 0, 0, 3, 0, 9, 0, 0, 0, 27, 0, 33, 0, 0, 1,
0, 7, 0, 0, 0, 25, 0, 31, 0, 0, 0, 0, 5, 0, 11, 0, 0, 0, 29, 0, 35, 0, 0, 0, 6,
0, 12, 0, 0, 0, 30, 0, 36, 0, 0, 4, 0, 10, 0, 0, 0, 28, 0, 34, 0, 0, 0, 0, 14,
0, 20, 0, 0, 0, 38, 0, 44, 0, 0, 0, 15, 0, 21, 0, 0, 0, 39, 0, 45, 0, 0, 13, 0,
19, 0, 0, 0, 37, 0, 43, 0, 0, 0, 0, 17, 0, 23, 0, 0, 0, 41, 0, 47, 0, 0, 0, 18,
0, 24, 0, 0, 0, 42, 0, 48, 0, 0, 16, 0, 22, 0, 0, 0, 40, 0, 46, 0, 0, 0}, nd4j::DataType::FLOAT32);
x.linspace(1);
nd4j::ops::space_to_batch_nd op;
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
// z->printBuffer();
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, space_to_batch_nd_3) {
NDArray x('c', {2, 2,4,3, 1}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
NDArray paddings('c', {3, 2}, {1,1, 0,2, 2,1} , nd4j::DataType::INT32);
NDArray exp('c', {24, 2,3,2, 1}, { 0, 0, 0, 0, 0, 0, 0, 14, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15,
0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 19, 0, 0, 0, 0, 0, 0, 0,
0, 0, 37, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 47, 0, 0,
0, 0, 0, 0, 0, 0, 0, 18, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 46, 0, 0, 0, 0, 2, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 11, 0, 0, 0, 0, 0, 0,
0, 0, 0, 29, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 36, 0, 0,
0, 0, 0, 0, 0, 0, 4, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0}, nd4j::DataType::FLOAT32);
x.linspace(1);
nd4j::ops::space_to_batch_nd op;
auto result = op.execute({&x, &blockShape, &paddings}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
// z->printBuffer();
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, batch_to_space_nd_1) {
NDArray x('c', {8, 1, 1, 1, 3}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 2} , nd4j::DataType::INT32); // three spatial dimensions
NDArray crop('c', {3, 2}, {0, 0, 0, 0, 0, 0} , nd4j::DataType::INT32);
NDArray exp('c', {1, 2, 2, 2, 3}, nd4j::DataType::FLOAT32);
x.linspace(1);
exp.linspace(1);
nd4j::ops::batch_to_space_nd op;
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, batch_to_space_nd_2) {
NDArray x('c', {24, 1,3,2, 1}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
NDArray crop('c', {3, 2}, {0,0, 0,2, 2,1} , nd4j::DataType::INT32);
NDArray exp('c', {2, 2,4,3, 1}, {25, 2, 14, 61, 38, 50, 27, 4, 16, 63, 40, 52, 97, 74, 86, 133, 110, 122, 99, 76, 88, 135, 112, 124,
31, 8, 20, 67, 44, 56, 33, 10, 22, 69, 46, 58, 103, 80, 92, 139, 116, 128, 105, 82, 94, 141, 118, 130}, nd4j::DataType::FLOAT32);
x.linspace(1);
nd4j::ops::batch_to_space_nd op;
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
// z->printBuffer();
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests13, batch_to_space_nd_3) {
NDArray x('c', {24, 2,3,2, 1}, nd4j::DataType::FLOAT32);
NDArray blockShape('c', {3}, {2, 2, 3} , nd4j::DataType::INT32); // three spatial dimensions
NDArray crop('c', {3, 2}, {1,1, 0,2, 2,1} , nd4j::DataType::INT32);
NDArray exp('c', {2, 2,4,3, 1}, {193, 146, 170, 265, 218, 242, 195, 148, 172, 267, 220, 244, 55, 8, 32, 127, 80, 104, 57, 10, 34, 129, 82,
106, 205, 158, 182, 277, 230, 254, 207, 160, 184, 279, 232, 256, 67, 20, 44, 139, 92, 116, 69, 22, 46, 141, 94, 118}, nd4j::DataType::FLOAT32);
x.linspace(1);
nd4j::ops::batch_to_space_nd op;
auto result = op.execute({&x, &blockShape, &crop}, {}, {});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(0);
// z->printBuffer();
ASSERT_TRUE(exp.isSameShape(z));
ASSERT_TRUE(exp.equalsTo(z));
delete result;
}