From e50b285c2c572a2e361da15ce9a45ccaa75dede5 Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 22 Jan 2020 09:46:33 +0200 Subject: [PATCH] Shugeo resize area (#162) * Added implementation for resize_area op. Initial commit. * Added implementation of resize_area op. Initial revision. Signed-off-by: shugeo * Corrected resizeArea functor call. Signed-off-by: shugeo * Implementation of resize_area. Cpu platform helpers. Signed-off-by: shugeo * Implementation for resize_area helpers. The first part revision. Signed-off-by: shugeo * Added a set of tests for resize_area op. Signed-off-by: shugeo * Cuda implementation for resize_area. Initial approach. Signed-off-by: shugeo * Adding multithreading for resize_area algorithm. Signed-off-by: shugeo * Cuda implementation of resize_area helpers. Shared memory approach. Signed-off-by: shugeo * Refactored resizeAreaKernel with cuda implementation. * Eliminated compilation errors. * ResizeArea helpers for cuda platform. The first working revision. Signed-off-by: shugeo * Added test for batched resize_area op testing. Signed-off-by: shugeo * Implementation of resize_are for cuda platform and tests. Signed-off-by: shugeo * Fixed multithreading with resize_area op helper. Signed-off-by: shugeo * Corrected copyright marks with sources. Signed-off-by: shugeo * Corrected copyright mark for resize_area op implementation. Signed-off-by: shugeo * Corrected copyright mark for parity ops header. Signed-off-by: shugeo * Corrected typo in strings and so on with image resize ops. Signed-off-by: shugeo * Refactored resize_area helpers and multithreading. Signed-off-by: shugeo * Added ResizeArea wrapper * Added test with align_corners and fixed shape processing with only int args given for output size. Signed-off-by: shugeo * Added test * TF mapping for ResizeArea * Fixed implementation issues with resize_area op for both platforms. Signed-off-by: shugeo * Refactored image resizer struct to use flexible types for ints and floats. Signed-off-by: shugeo * Improved multithreading with resizeAreaKernel launch. Signed-off-by: shugeo * Use asynchronical memory copying with cuda platform image resize allocations. Signed-off-by: shugeo Co-authored-by: Alexander Stoyakin --- .../generic/parity_ops/resize_area.cpp | 122 +++++++ .../generic/parity_ops/resize_bicubic.cpp | 6 +- .../generic/parity_ops/resize_linear.cpp | 1 + .../generic/parity_ops/resize_neighbor.cpp | 7 +- .../ops/declarable/headers/parity_ops.h | 23 +- .../declarable/helpers/cpu/image_resize.cpp | 234 +++++++++++++- .../declarable/helpers/cuda/image_resize.cu | 234 +++++++++++++- .../ops/declarable/helpers/image_resize.h | 3 + .../layers_tests/DeclarableOpsTests11.cpp | 298 ++++++++++++++++++ .../layers_tests/DeclarableOpsTests15.cpp | 14 + .../converters/ImportClassMapping.java | 1 + .../linalg/api/ops/impl/image/ResizeArea.java | 114 +++++++ .../nd4j/linalg/custom/CustomOpsTests.java | 28 ++ 13 files changed, 1061 insertions(+), 24 deletions(-) create mode 100644 libnd4j/include/ops/declarable/generic/parity_ops/resize_area.cpp create mode 100644 nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/ResizeArea.java diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/resize_area.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/resize_area.cpp new file mode 100644 index 000000000..b0f637c45 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/parity_ops/resize_area.cpp @@ -0,0 +1,122 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// + +#include +#if NOT_EXCLUDED(OP_resize_area) + +#include +#include + +namespace nd4j { + namespace ops { + CUSTOM_OP_IMPL(resize_area, 1, 1, false, 0, -2) { + + auto image = INPUT_VARIABLE(0); + int width; + int height; + + if (block.width() == 2) { + auto size = INPUT_VARIABLE(1); // integer vector with shape {2} and content (new_height, new_width) + REQUIRE_TRUE(size->rankOf() == 1, size->lengthOf() == 2, 0, "resize_area: Resize params is a pair of values, not %i.", size->lengthOf()); + size->syncToHost(); + width = size->e(1); + height = size->e(0); + } + else { + REQUIRE_TRUE(block.numI() == 2, 0, "resize_area: Resize params already given by the second param. Int params are expensive."); + width = INT_ARG(1); + height = INT_ARG(0); + } + + auto output = OUTPUT_VARIABLE(0); + if (output->isEmpty()) return Status::OK(); + auto inRank = image->rankOf(); + + REQUIRE_TRUE(inRank == 3 || inRank == 4, 0, "resize_area: Source tensor should have rank 4, but %i given.", inRank); + REQUIRE_TRUE(output->rankOf() == inRank, 0, "resize_area: Source tensor and output should have the same rank, but %i and %i given.", inRank, output->rankOf()); + REQUIRE_TRUE(width > 0 , 0, "resize_area: picture width should be positive 32 bit integer, but %i given", width); + REQUIRE_TRUE(height > 0 , 0, "resize_area: picture height should be positive 32 bit integer, but %i given", height); + REQUIRE_TRUE(image->lengthOf() > 0, 0, "resize_area: Only non-zero images allowed to processing."); + + auto alignCorners = false; + if (block.numB() > 0) { + alignCorners = B_ARG(0); + } + + auto source = inRank == 4?image->reshape(image->ordering(), {image->sizeAt(0), image->sizeAt(1), image->sizeAt(2), image->sizeAt(3)}):image->reshape(image->ordering(), {1, image->sizeAt(0), image->sizeAt(1), image->sizeAt(2)}); + auto target = inRank == 4?output->reshape(output->ordering(), {output->sizeAt(0), output->sizeAt(1), output->sizeAt(2), output->sizeAt(3)}):output->reshape(output->ordering(), {1, output->sizeAt(0), output->sizeAt(1), output->sizeAt(2)}); + + return helpers::resizeAreaFunctor(block.launchContext(), &source, width, height, alignCorners, &target); + } + + DECLARE_SHAPE_FN(resize_area) { + auto shapeList = SHAPELIST(); + auto in = inputShape->at(0); + + Nd4jLong* outputShape; + auto inRank = shape::rank(in); + int width; + int height; + if (block.width() == 2) { + auto newImageSize = INPUT_VARIABLE(1); + REQUIRE_TRUE(newImageSize->lengthOf() == 2, 0, + "resize_area: Resize params is a pair of values, not %i.", newImageSize->lengthOf()); + REQUIRE_TRUE(block.numI() <= 1, 0, + "resize_area: Resize params already given by the second param. Int params are expensive."); + width = newImageSize->e(0); + height = newImageSize->e(1); + } + else { + REQUIRE_TRUE(block.numI() == 2, 0, "resize_area: Resize params ommited as pair ints nor int tensor."); + width = INT_ARG(1); + height = INT_ARG(0); + } + + REQUIRE_TRUE(inRank == 4 || inRank == 3, 0, "resize_area: Source tensor should have rank 4, but %i given.", inRank); + + ALLOCATE(outputShape, block.getWorkspace(), shape::shapeInfoLength(inRank), Nd4jLong); + outputShape[0] = inRank; + if (inRank == 4) { + outputShape[1] = in[1]; + outputShape[2] = width; + outputShape[3] = height; + outputShape[4] = in[4]; + } + else { + outputShape[1] = width; + outputShape[2] = height; + outputShape[3] = in[3]; + } + ShapeUtils::updateStridesAndType(outputShape, DataType::FLOAT32, shape::order(in)); + + shapeList->push_back(CONSTANT(outputShape)); + return shapeList; + } + DECLARE_TYPES(resize_area) { + getOpDescriptor() + ->setAllowedInputTypes(0, {ALL_FLOATS, ALL_INTS}) + ->setAllowedInputTypes(1, DataType::INT32) + ->setAllowedOutputTypes({DataType::FLOAT32}); + } + + } +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/resize_bicubic.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/resize_bicubic.cpp index da98c1702..26ca7eec9 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/resize_bicubic.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/resize_bicubic.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2019 Konduit K.K. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -76,8 +76,8 @@ namespace nd4j { int width; int height; auto newImageSize = INPUT_VARIABLE(1); - REQUIRE_TRUE(newImageSize->lengthOf() == 2, 0, "resize_bilinear: Resize params is a pair of values, not %i.", newImageSize->lengthOf()); - REQUIRE_TRUE(block.numI() <= 1, 0, "resize_bilinear: Resize params already given by the second param. Int params are expensive."); + REQUIRE_TRUE(newImageSize->lengthOf() == 2, 0, "resize_bicubic: Resize params is a pair of values, not %i.", newImageSize->lengthOf()); + REQUIRE_TRUE(block.numI() <= 1, 0, "resize_bicubic: Resize params already given by the second param. Int params are expensive."); width = newImageSize->e(0); height = newImageSize->e(1); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/resize_linear.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/resize_linear.cpp index f1f79b08f..652b78cf1 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/resize_linear.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/resize_linear.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/resize_neighbor.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/resize_neighbor.cpp index 6c18e61e1..db477f569 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/resize_neighbor.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/resize_neighbor.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,7 +20,7 @@ // #include -#if NOT_EXCLUDED(OP_resize_bilinear) +#if NOT_EXCLUDED(OP_resize_nearest_neighbor) //#include #include @@ -54,7 +55,7 @@ namespace nd4j { if (block.numB() > 1) halfPixelCenter = B_ARG(1); - REQUIRE_TRUE(width <= (1 << 24) || height <= (1 << 24), 0, "resize_nearest_neighbour: the image resize should be limited to 2^24 pixels both for height and width, but %d and %d were given.", height, width); + REQUIRE_TRUE(width <= (1 << 24) || height <= (1 << 24), 0, "resize_nearest_neighbor: the image resize should be limited to 2^24 pixels both for height and width, but %d and %d were given.", height, width); REQUIRE_TRUE(inRank == 4 || inRank == 3, 0, "resize_nearest_neighbor: Input should be 4D tensor, but rank %i occured"); REQUIRE_TRUE(inRank == output->rankOf(), 0, "resize_nearest_neighbor: Input and output ranks should be equals, but %i and %i occured.", inRank, output->rankOf()); REQUIRE_TRUE(image->dataType() == output->dataType(), 0, "resize_nearest_neighbor: Input and output types should be the same, but `%s' occured instead.", DataTypeUtils::asString(output->dataType()).c_str()); @@ -73,7 +74,7 @@ namespace nd4j { auto inRank = shape::rank(in); Nd4jLong* outputShape; - REQUIRE_TRUE(inRank == 4 || inRank == 3, 0, "resize_bilinear: input image should be 4D " + REQUIRE_TRUE(inRank == 4 || inRank == 3, 0, "resize_nearest_neighbor: input image should be 4D " "tensor, but input has rank %i", inRank); diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index c0bf2ab60..4a1f85130 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -1722,6 +1722,27 @@ namespace nd4j { DECLARE_CUSTOM_OP(resize_bicubic, 1, 1, false, 0, -2); #endif + /** + * This op make area interpolated resize (as OpenCV INTER_AREA algorithm) for given tensor + * + * input array: + * 0 - images - 4D-Tensor with shape (batch, sizeX, sizeY, channels) + * 1 - size - 1D-Tensor with 2 values (newWidth, newHeight) (if missing a pair of integer args should be provided). + * + * int args: - proveded only when size tensor is missing + * 0 - new height + * 1 - new width + * boolean args: + * 0 - align_corners - optional (default is false) + * + * output array: + * the 4D-Tensor with resized image (shape is {batch, newWidth, newHeight, channels}) + * + */ + #if NOT_EXCLUDED(OP_resize_area) + DECLARE_CUSTOM_OP(resize_area, 1, 1, false, 0, -2); + #endif + /** * This op make interpolated resize for given tensor with given algorithm. * Supported algorithms are bilinear, bicubic, nearest_neighbor. diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp index ced05ceaa..d4089359f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. - * Copyright (c) 2019 Konduit K.K. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -35,6 +35,8 @@ limitations under the License. #include #include +#include +#include "../cross.h" namespace nd4j { namespace ops { @@ -55,8 +57,9 @@ namespace helpers { : inSize / static_cast(outSize); } - struct ImageResizerState { - explicit ImageResizerState(bool alignCorners, bool halfPixelCenters) + template + struct ImageResizerStateCommon { + explicit ImageResizerStateCommon(bool alignCorners, bool halfPixelCenters) : _alignCorners(alignCorners), _halfPixelCenters(halfPixelCenters) {} @@ -94,14 +97,14 @@ namespace helpers { return validateAndCalculateOutputSize(input, width, height); } - Nd4jLong batchSize; - Nd4jLong outHeight; - Nd4jLong outWidth; - Nd4jLong inHeight; - Nd4jLong inWidth; - Nd4jLong channels; - float heightScale; - float widthScale; + I batchSize; + I outHeight; + I outWidth; + I inHeight; + I inWidth; + I channels; + F heightScale; + F widthScale; NDArray* output = nullptr; private: @@ -109,6 +112,8 @@ namespace helpers { bool _halfPixelCenters; }; + typedef ImageResizerStateCommon ImageResizerState; + // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the // floating point coordinates of the top,left pixel is 0.5,0.5. struct HalfPixelScaler { @@ -255,7 +260,7 @@ namespace helpers { // Handle no-op resizes efficiently. if (outHeight == inHeight && outWidth == inWidth) { output->assign(images); - return ND4J_STATUS_OK; + return Status::OK(); } std::vector ys(outHeight + 1); @@ -283,7 +288,7 @@ namespace helpers { samediff::Threads::parallel_for(func, 0, xsSize); resizeImage_(images->getDataBuffer()->primaryAsT(), batchSize, inHeight, inWidth, outHeight, outWidth, channels, xs, ys, output->dataBuffer()->primaryAsT()); - return ND4J_STATUS_OK; + return Status::OK(); } template @@ -353,6 +358,7 @@ namespace helpers { int resizeBilinearFunctor(nd4j::LaunchContext * context, NDArray const *images, int const width, int const height, bool const alignCorners, bool const halfPixelCenter, NDArray *output) { BUILD_DOUBLE_SELECTOR(images->dataType(), output->dataType(), return resizeBilinearFunctor_, (images, width, height, alignCorners, halfPixelCenter, output), NUMERIC_TYPES, FLOAT_TYPES); + return Status::OK(); } int resizeNeighborFunctor(nd4j::LaunchContext * context, NDArray const *images, int const width, int const height, @@ -883,6 +889,206 @@ namespace helpers { bool const alignCorners, bool const halfPixelAlign, NDArray* output) { BUILD_SINGLE_SELECTOR(image->dataType(), return resizeBicubicFunctorA_, (context, image, width, height, alignCorners, halfPixelAlign, output), NUMERIC_TYPES); } +// ------------------------------------------------------------------------------------------------------------------ // + struct CachedInterpolation { + Nd4jLong start; + Nd4jLong end; + float startScale; + float endMinusOneScale; + bool needsBounding; + }; + + template + struct ScaleCache { + float yScale; + T const* yPtr; + }; + // Computes the sum of all x values defined by taken across + // the y offsets and scales defined by y_ptrs and y_scales, for channel c. + // + // Note that is a template parameter to avoid a performance + // penalty from dynamically checking it. + template + static void computePatchSumOf3Channels(float scale, + ImageResizerState const& st, + std::vector> const& yPtrs, + CachedInterpolation const& xCache, + float* outputPtr) { + + bool const needsXBounding = xCache.needsBounding; + + auto boundIfNeeded = [needsXBounding](Nd4jLong x, Nd4jLong y) -> Nd4jLong { + return (needsXBounding ? bound(x, y) : (x)); + }; + + float sum_0 = 0; + float sum_1 = 0; + float sum_2 = 0; + for (int i = 0; i < yPtrs.size(); ++i) { + const T* ptr = yPtrs[i].yPtr; + float scaleX = xCache.startScale; + Nd4jLong offset = 3 * boundIfNeeded(xCache.start, st.inWidth); + float sum_y_0 = static_cast(ptr[offset + 0]) * scaleX; + float sum_y_1 = static_cast(ptr[offset + 1]) * scaleX; + float sum_y_2 = static_cast(ptr[offset + 2]) * scaleX; + + if (xCache.start + 1 != xCache.end) { + for (Nd4jLong x = xCache.start + 1; x < xCache.end - 1; ++x) { + Nd4jLong offset = 3 * boundIfNeeded(x, st.inWidth); + sum_y_0 += static_cast(ptr[offset + 0]); + sum_y_1 += static_cast(ptr[offset + 1]); + sum_y_2 += static_cast(ptr[offset + 2]); + } + scaleX = xCache.endMinusOneScale; + offset = st.channels * boundIfNeeded(xCache.end - 1, st.inWidth); + sum_y_0 += static_cast(ptr[offset + 0]) * scaleX; + sum_y_1 += static_cast(ptr[offset + 1]) * scaleX; + sum_y_2 += static_cast(ptr[offset + 2]) * scaleX; + } + sum_0 += sum_y_0 * yPtrs[i].yScale; + sum_1 += sum_y_1 * yPtrs[i].yScale; + sum_2 += sum_y_2 * yPtrs[i].yScale; + } + + outputPtr[0] = sum_0 * scale; + outputPtr[1] = sum_1 * scale; + outputPtr[2] = sum_2 * scale; + } + + // Computes the sum of all x values defined by taken across + // the y offsets and scales defined by y_ptrs and y_scales, for channel c. + // + // Note that is a template parameter to avoid a performance + // penalty from dynamically checking it. + template + static void computePatchSum(float scale, const ImageResizerState& st, + const std::vector>& yPtrs, + const CachedInterpolation& xCache, + float* outputPtr) { + + bool const needsXBounding = xCache.needsBounding; + + auto boundIfNeeded = [needsXBounding](Nd4jLong x, Nd4jLong y) -> Nd4jLong { + return (needsXBounding ? bound(x, y) : (x)); + }; + + const auto numChannels = st.channels; + for (Nd4jLong c = 0; c < numChannels; ++c) { + float sum = 0; + for (int i = 0; i < yPtrs.size(); ++i) { + T const* ptr = yPtrs[i].yPtr; + float scaleX = xCache.startScale; + float sumY = static_cast(ptr[numChannels * boundIfNeeded(xCache.start, st.inWidth) + c]) * scaleX; + if (xCache.start + 1 != xCache.end) { + for (Nd4jLong x = xCache.start + 1; x < xCache.end - 1; ++x) { + sumY += static_cast( + ptr[numChannels * boundIfNeeded(x, st.inWidth) + c]); + } + scaleX = xCache.endMinusOneScale; + sumY += static_cast(ptr[numChannels * boundIfNeeded(xCache.end - 1, st.inWidth) + c]) * scaleX; + } + sum += sumY * yPtrs[i].yScale; + } + outputPtr[c] = sum * scale; + } + } + + + + template + static void resizeArea(ImageResizerState const& st, std::vector const& caches, NDArray const* input, NDArray* output) { + T const* inputPtr = input->bufferAsT(); + float scale = 1.f / (st.heightScale * st.widthScale); + auto outputPtr = output->bufferAsT(); // output is always float. TO DO: provide another float types also with template declaration + + auto batchProcess = PRAGMA_THREADS_FOR { + for (auto batch = start; batch < stop; batch += increment) { + for (auto y = 0; y < st.outHeight; ++y) { + const float inY = y * st.heightScale; + const float inY1 = (y + 1) * st.heightScale; + // The start and end height indices of all the cells that could + // contribute to the target cell. + const Nd4jLong yStart = math::nd4j_floor(inY); + const Nd4jLong yEnd = math::nd4j_ceil(inY1); + + std::vector> yCaches; + auto cacheLen = yEnd - yStart; + if (cacheLen) { + yCaches.resize(cacheLen); + }; + + for (auto i = yStart, k = 0LL; i < yEnd; ++i, ++k) { + ScaleCache scaleCache; + if (i < inY) { + scaleCache.yScale = (i + 1 > inY1 ? st.heightScale : i + 1 - inY); + } else { + scaleCache.yScale = (i + 1 > inY1 ? inY1 - i : 1.0); + } + scaleCache.yPtr = inputPtr + (batch * st.inHeight * st.inWidth * st.channels + + bound(i, st.inHeight) * st.inWidth * st.channels); + yCaches[k] = scaleCache; + } + float* output = outputPtr + (batch * st.outHeight + y) * st.channels * st.outWidth; + + if (st.channels == 3) { + for (Nd4jLong x = 0; x < st.outWidth; ++x) { + const CachedInterpolation &xCache = caches[x]; + computePatchSumOf3Channels(scale, st, yCaches, xCache, output); + output += st.channels; + } + } else { + for (Nd4jLong x = 0; x < st.outWidth; ++x) { + const CachedInterpolation &xCache = caches[x]; + computePatchSum(scale, st, yCaches, xCache, output); + output += st.channels; + } + } + } + } + }; + samediff::Threads::parallel_tad(batchProcess, 0, st.batchSize, 1); + } + + template + int resizeAreaFunctor_(nd4j::LaunchContext* context, NDArray const* image, int const width, int const height, + bool const alignCorners, NDArray* output) { + ImageResizerState st(alignCorners, false); // Create resize info + auto res = st.validateAndCalculateOutputSize(image, width, height); + if (Status::OK() == res) { + std::vector xCached(st.outWidth); + auto cachingProcedure = PRAGMA_THREADS_FOR { + for (auto x = start; x < stop; x += increment) { + auto &xCache = xCached[x]; + const float inX = x * st.widthScale; + const float inX1 = (x + 1) * st.widthScale; + + Nd4jLong v = math::nd4j_floor(inX); + xCache.start = v; + xCache.startScale = + v < inX ? (v + 1 > inX1 ? st.widthScale : v + 1 - inX) : (v + 1 > inX1 ? inX1 - v + : 1.f); + v = math::nd4j_ceil(inX1); + xCache.end = v--; + xCache.endMinusOneScale = + v < inX ? (v + 1 > inX1 ? st.widthScale : v + 1 - inX) : (v + 1 > inX1 ? inX1 - v + : 1.f); + xCache.needsBounding = bound(xCache.start, st.inWidth) != xCache.start || + bound(xCache.end - 1, st.inWidth) != (xCache.end - 1); + + } + }; + samediff::Threads::parallel_for(cachingProcedure, 0, xCached.size(), 1); + + resizeArea(st, xCached, image, output); + } + return res; + } + + int resizeAreaFunctor(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, + bool const alignCorners, NDArray* output) { + BUILD_SINGLE_SELECTOR(image->dataType(), return resizeAreaFunctor_, (context, image, width, height, alignCorners, output), NUMERIC_TYPES); + } + // ------------------------------------------------------------------------------------------------------------------ // int resizeFunctor(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output) { @@ -890,9 +1096,9 @@ namespace helpers { case kResizeBilinear: return resizeBilinearFunctor(context, image, width, height, false, false, output); break; case kResizeNearest: return resizeNeighborFunctor(context, image, width, height, false, false, output); break; case kResizeBicubic: return resizeBicubicFunctor(context, image, width, height, preserveAspectRatio, antialias, output); break; + case kResizeArea: return resizeAreaFunctor(context, image, width, height, preserveAspectRatio, output); case kResizeLanczos5: case kResizeGaussian: - case kResizeArea: case kResizeMitchelcubic: throw std::runtime_error("helper::resizeFunctor: Non implemented yet."); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu index ffa741509..1dcc6fd00 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019-2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -29,7 +30,7 @@ limitations under the License. ==============================================================================*/ // -// @author sgazeos@gmail.com +// @author George A. Shulinok // #include @@ -639,7 +640,7 @@ namespace helpers { if (err != 0) { cuda_exception::build("helpers::computeXWeightsAndIndices: Cannot allocated device memory for interpolate calculator", err); } - err = cudaMemcpy(pCalcD, &calc, sizeof(CachedInterpolationCalculator), cudaMemcpyHostToDevice); + err = cudaMemcpyAsync(pCalcD, &calc, sizeof(CachedInterpolationCalculator), cudaMemcpyHostToDevice, *stream); if (err != 0) { cuda_exception::build("helpers::computeXWeightsAndIndices: Cannot set up device memory for interpolate calculator", err); } @@ -847,7 +848,7 @@ namespace helpers { if (err != 0) { throw cuda_exception::build("helpers::bicubicInterpolateWithCaching: Cannot allocate memory for resizerState", err); } - err = cudaMemcpy(resizerStateD, &resizerState, sizeof(ImageResizerState), cudaMemcpyHostToDevice); + err = cudaMemcpyAsync(resizerStateD, &resizerState, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream); if (err != 0) { throw cuda_exception::build("helpers::bicubicInterpolateWithCaching: Cannot set up memory for resizerState", err); } @@ -927,6 +928,233 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template int resizeBicubicFunctor_, (nd4j::LaunchContext * context, NDArray const* image, int width, int height, bool preserveAspectRatio, bool antialias, NDArray* output), NUMERIC_TYPES); // ------------------------------------------------------------------------------------------------------------------ // + struct CachedInterpolation { + Nd4jLong start; + Nd4jLong end; + float startScale; + float endMinusOneScale; + bool needsBounding; + }; + + static __global__ void fillInterpolationCache(CachedInterpolation* xCached, Nd4jLong cacheLen, Nd4jLong inWidth, float widthScale) { + auto start = blockIdx.x * blockDim.x + threadIdx.x; + auto increment = blockDim.x * gridDim.x; + + for (auto x = start; x < cacheLen; x += increment) { + auto& xCache = xCached[x]; + const float inX = x * widthScale; + const float inX1 = (x + 1) * widthScale; + + Nd4jLong v = math::nd4j_floor(inX); + xCache.start = v; + xCache.startScale = v < inX ? (v + 1 > inX1 ? widthScale : v + 1 - inX) : (v + 1 > inX1 ? inX1 - v : 1.f); + v = math::nd4j_ceil(inX1); + xCache.end = v--; + xCache.endMinusOneScale = v < inX ? (v + 1 > inX1 ? widthScale : v + 1 - inX) : (v + 1 > inX1 ? inX1 - v : 1.f); + xCache.needsBounding = bound(xCache.start, inWidth) != xCache.start || bound(xCache.end - 1, inWidth) != (xCache.end - 1); + } + } + +// ------------------------------------------------------------------------------------------------------------------ // + template + struct ScaleCache { + float yScale; + T const* yPtr; + }; + + // Computes the sum of all x values defined by taken across + // the y offsets and scales defined by y_ptrs and y_scales, for channel c. + // + // Note that is a template parameter to avoid a performance + // penalty from dynamically checking it. + template + static __device__ void computePatchSumOf3Channels(float scale, + const ImageResizerState& st, + ScaleCache const* yScaleCache, + Nd4jLong ptrsLen, + const CachedInterpolation& xCache, + float* outputPtr) { + + bool const needsXBounding = xCache.needsBounding; + + auto boundIfNeeded = [needsXBounding](Nd4jLong x, Nd4jLong y) -> Nd4jLong { + return (needsXBounding ? bound(x, y) : (x)); + }; + + float sum_0 = 0; + float sum_1 = 0; + float sum_2 = 0; + for (int i = 0; i < ptrsLen; ++i) { + const T* ptr = yScaleCache[i].yPtr; + float scaleX = xCache.startScale; + Nd4jLong offset = 3 * boundIfNeeded(xCache.start, st.inWidth); + float sum_y_0 = static_cast(ptr[offset + 0]) * scaleX; + float sum_y_1 = static_cast(ptr[offset + 1]) * scaleX; + float sum_y_2 = static_cast(ptr[offset + 2]) * scaleX; + + if (xCache.start + 1 != xCache.end) { + for (Nd4jLong x = xCache.start + 1; x < xCache.end - 1; ++x) { + Nd4jLong offset = 3 * boundIfNeeded(x, st.inWidth); + sum_y_0 += static_cast(ptr[offset + 0]); + sum_y_1 += static_cast(ptr[offset + 1]); + sum_y_2 += static_cast(ptr[offset + 2]); + } + scaleX = xCache.endMinusOneScale; + offset = st.channels * boundIfNeeded(xCache.end - 1, st.inWidth); + sum_y_0 += static_cast(ptr[offset + 0]) * scaleX; + sum_y_1 += static_cast(ptr[offset + 1]) * scaleX; + sum_y_2 += static_cast(ptr[offset + 2]) * scaleX; + } + sum_0 += sum_y_0 * yScaleCache[i].yScale; + sum_1 += sum_y_1 * yScaleCache[i].yScale; + sum_2 += sum_y_2 * yScaleCache[i].yScale; + } + + outputPtr[0] = sum_0 * scale; + outputPtr[1] = sum_1 * scale; + outputPtr[2] = sum_2 * scale; + } + + // Computes the sum of all x values defined by taken across + // the y offsets and scales defined by y_ptrs and y_scales, for channel c. + // + // Note that is a template parameter to avoid a performance + // penalty from dynamically checking it. + template + static __device__ void computePatchSum(float scale, const ImageResizerState& st, + ScaleCache const* yScaleCache, Nd4jLong ptrsLen, + const CachedInterpolation& xCache, + float* outputPtr) { + + bool const needsXBounding = xCache.needsBounding; + + auto boundIfNeeded = [needsXBounding](Nd4jLong x, Nd4jLong y) -> Nd4jLong { + return (needsXBounding ? bound(x, y) : (x)); + }; + + const auto numChannels = st.channels; + for (Nd4jLong c = 0; c < numChannels; ++c) { + float sum = 0; + for (int i = 0; i < ptrsLen; ++i) { + T const* ptr = yScaleCache[i].yPtr; + float scaleX = xCache.startScale; + float sumY = static_cast(ptr[numChannels * boundIfNeeded(xCache.start, st.inWidth) + c]) * scaleX; + if (xCache.start + 1 != xCache.end) { + for (Nd4jLong x = xCache.start + 1; x < xCache.end - 1; ++x) { + sumY += static_cast( + ptr[numChannels * boundIfNeeded(x, st.inWidth) + c]); + } + scaleX = xCache.endMinusOneScale; + sumY += static_cast(ptr[numChannels * boundIfNeeded(xCache.end - 1, st.inWidth) + c]) * scaleX; + } + sum += sumY * yScaleCache[i].yScale; + } + outputPtr[c] = sum * scale; + } + } + + template + static __global__ void resizeAreaKernel(ImageResizerState const* pSt, CachedInterpolation const* caches, float scale, + T const* inputPtr, Nd4jLong* inputShape, float* outputPtr, Nd4jLong* outputShape) { + + __shared__ ScaleCache* sharedPtr; + + if (threadIdx.x == 0) { + extern __shared__ char shared[]; + sharedPtr = reinterpret_cast*>(shared); + } + __syncthreads(); + + for (auto batch = blockIdx.x; batch < pSt->batchSize; batch += gridDim.x) { + for (auto y = threadIdx.x; y < pSt->outHeight; y += blockDim.x) { + const float inY = y * pSt->heightScale; + const float inY1 = (y + 1) * pSt->heightScale; + // The start and end height indices of all the cells that could + // contribute to the target cell. + const Nd4jLong yStart = math::nd4j_floor(inY); + const Nd4jLong yEnd = math::nd4j_ceil(inY1); + auto scalesDim = yEnd - yStart; + auto yScaleCache = sharedPtr + scalesDim * y * sizeof(ScaleCache); + + //auto startPtr = sharedPtr + y * scalesDim * sizeof(float); + //float* yScales = yScalesShare + y * sizeof(float) * scalesDim;//reinterpret_cast(startPtr); //shared + y * scalesDim * y + scalesDim * sizeof(T const *) [scalesDim]; + //T const** yPtrs = yPtrsShare + y * sizeof(T const*) * scalesDim; //[scalesDim]; + //yPtrs = reinterpret_cast(sharedBuf); + float* output = outputPtr + (batch * pSt->outHeight + y) * pSt->channels * pSt->outWidth; + //int k = 0; + for (Nd4jLong i = yStart, k = 0; i < yEnd; ++i, ++k) { + float scaleY; + if (i < inY) { + scaleY = (i + 1 > inY1 ? pSt->heightScale : i + 1 - inY); + } else { + scaleY = (i + 1 > inY1 ? inY1 - i : 1.0); + } + yScaleCache[k].yScale = scaleY; + yScaleCache[k].yPtr = inputPtr + (batch * pSt->inHeight * pSt->inWidth * pSt->channels + bound(i, pSt->inHeight) * pSt->inWidth * pSt->channels); + } + + if (pSt->channels == 3) { + for (Nd4jLong x = 0; x < pSt->outWidth; ++x) { + const CachedInterpolation& xCache = caches[x]; + computePatchSumOf3Channels(scale, *pSt, yScaleCache, scalesDim, xCache, output); + output += pSt->channels; + } + } else { + for (Nd4jLong x = 0; x < pSt->outWidth; ++x) { + const CachedInterpolation &xCache = caches[x]; + computePatchSum(scale, *pSt, yScaleCache, scalesDim, xCache, output); + output += pSt->channels; + } + } + } + } + } + + template + static void resizeArea(cudaStream_t* stream, ImageResizerState const& st, CachedInterpolation* cache, + NDArray const* input, NDArray* output) { + + T const* inputPtr = reinterpret_cast(input->getSpecialBuffer()); +// float* yScales; +// T const** yPtrs; + float scale = 1.f / (st.heightScale * st.widthScale); + auto outputPtr = reinterpret_cast(output->specialBuffer()); // output is always float. TO DO: provide another float types also with template declaration + ImageResizerState* pSt; + auto err = cudaMalloc(&pSt, sizeof(ImageResizerState)); + err = cudaMemcpyAsync(pSt, &st, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream); + + resizeAreaKernel<<<128, 4, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->getSpecialShapeInfo(), outputPtr, + output->specialShapeInfo()); + err = cudaStreamSynchronize(*stream); + err = cudaFree(pSt); + } +// ------------------------------------------------------------------------------------------------------------------ // + template + int resizeAreaFunctor_(nd4j::LaunchContext* context, NDArray const* image, int const width, int const height, + bool const alignCorners, NDArray* output) { + + ImageResizerState st(alignCorners, false); // Create resize info + auto res = st.validateAndCalculateOutputSize(image, width, height); + auto stream = context->getCudaStream(); + if (Status::OK() == res) { + CachedInterpolation* xCached; + (st.outWidth); + auto err = cudaMalloc(&xCached, sizeof(CachedInterpolation) * st.outWidth); + NDArray::prepareSpecialUse({output}, {image}); + fillInterpolationCache<<<128, 128, 256, *stream>>>(xCached, st.outWidth, st.inWidth, st.widthScale); + resizeArea(stream, st, xCached, image, output); + err = cudaStreamSynchronize(*stream); + err = cudaFree(xCached); + NDArray::registerSpecialUse({output}, {image}); + } + + return res; + } + int resizeAreaFunctor(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, + bool const alignCorners, NDArray* output) { + BUILD_SINGLE_SELECTOR(image->dataType(), return resizeAreaFunctor_, (context, image, width, height, alignCorners, output), NUMERIC_TYPES); + } + // ------------------------------------------------------------------------------------------------------------------ // // simplified bicubic resize without antialiasing // diff --git a/libnd4j/include/ops/declarable/helpers/image_resize.h b/libnd4j/include/ops/declarable/helpers/image_resize.h index d52fd74f7..047b2cf70 100644 --- a/libnd4j/include/ops/declarable/helpers/image_resize.h +++ b/libnd4j/include/ops/declarable/helpers/image_resize.h @@ -45,6 +45,9 @@ namespace helpers { bool preserveAspectRatio, bool antialias, NDArray* output); int resizeBicubicFunctorA(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, bool const alignCorners, bool const halfPixelAlign, NDArray* output); + int resizeAreaFunctor(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, + bool const alignCorners, NDArray* output); + int resizeFunctor(nd4j::LaunchContext * context, NDArray const* image, int const width, int const height, ImageResizeMethods method, bool preserveAspectRatio, bool antialias, NDArray* output); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index 2b01eca79..e97e543c7 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -1087,6 +1087,304 @@ TEST_F(DeclarableOpsTests11, ImageResizeBicubic_Test8) { delete results; } +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test1) { + + NDArray input = NDArrayFactory::create('c', {1, 3, 3, 4}); + NDArray expected = NDArrayFactory::create('c', {1, 6, 6, 4}, { + 1.f, 2.f, 3.f, 4.f, + 1.f, 2.f, 3.f, 4.f, + 5.f, 6.f, 7.f, 8.f, + 5.f, 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, 12.f, + 9.f, 10.f, 11.f, 12.f, + + 1.f, 2.f, 3.f, 4.f, + 1.f, 2.f, 3.f, 4.f, + 5.f, 6.f, 7.f, 8.f, + 5.f, 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, 12.f, + 9.f, 10.f, 11.f, 12.f, + + 13.f, 14.f, 15.f, 16.f, + 13.f, 14.f, 15.f, 16.f, + 17.f, 18.f, 19.f, 20.f, + 17.f, 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, 24.f, + 21.f, 22.f, 23.f, 24.f, + + 13.f, 14.f, 15.f, 16.f, + 13.f, 14.f, 15.f, 16.f, + 17.f, 18.f, 19.f, 20.f, + 17.f, 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, 24.f, + 21.f, 22.f, 23.f, 24.f, + + 25.f, 26.f, 27.f, 28.f, + 25.f, 26.f, 27.f, 28.f, + 29.f, 30.f, 31.f, 32.f, + 29.f, 30.f, 31.f, 32.f, + 33.f, 34.f, 35.f, 36.f, + 33.f, 34.f, 35.f, 36.f, + + 25.f, 26.f, 27.f, 28.f, + 25.f, 26.f, 27.f, 28.f, + 29.f, 30.f, 31.f, 32.f, + 29.f, 30.f, 31.f, 32.f, + 33.f, 34.f, 35.f, 36.f, + 33.f, 34.f, 35.f, 36.f }); + input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test2) { + + NDArray input = NDArrayFactory::create('c', {1, 3, 3, 1}); + NDArray expected = NDArrayFactory::create('c', {1, 6, 6, 1}, { + 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, + 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, + 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, + 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, + 7.f, 7.f, 8.f, 8.f, 9.f, 9.f, + 7.f, 7.f, 8.f, 8.f, 9.f, 9.f + }); + input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test3) { + + NDArray input = NDArrayFactory::create('c', {1, 3, 3, 3}); + NDArray expected = NDArrayFactory::create('c', {1, 6, 6, 3}, { + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f + }); + input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test4) { + + NDArray input = NDArrayFactory::create('c', {2, 3, 3, 3}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 + }); + + NDArray expected = NDArrayFactory::create('c', {2, 6, 6, 3}, { + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f + }); + //input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test5) { + + NDArray input = NDArrayFactory::create('c', {2, 3, 3, 3}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 + }); + + NDArray expected = NDArrayFactory::create('c', {2, 6, 6, 3}, { + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 1.f, 2.f, 3.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 10.f, 11.f, 12.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 16.f, 17.f, 18.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f, + 19.f, 20.f, 21.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 25.f, 26.f, 27.f + }); + //input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + +// result->printBuffer("Area Resized to 6x6"); +// expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test6) { + + NDArray input = NDArrayFactory::create('c', {2, 3, 3, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9 + }); + + NDArray expected = NDArrayFactory::create('c', {2, 6, 6, 1}, { + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 7.f, 7.f, 7.5f, 8.f, 8.f, 9.f, + + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 7.f, 7.f, 7.5f, 8.f, 8.f, 9.f + }); + //input.linspace(1); + auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input, &size}, {}, {}, {true}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + + result->printBuffer("Area Resized to 6x6"); + expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test7) { + + NDArray input = NDArrayFactory::create('c', {2, 3, 3, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9 + }); + + NDArray expected = NDArrayFactory::create('c', {2, 6, 6, 1}, { + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 7.f, 7.f, 7.5f, 8.f, 8.f, 9.f, + + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 7.f, 7.f, 7.5f, 8.f, 8.f, 9.f + }); + //input.linspace(1); +// auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input}, {}, {6, 6}, {true}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + + result->printBuffer("Area Resized to 6x6"); + expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + +TEST_F(DeclarableOpsTests11, ImageResizeArea_Test8) { + + NDArray input = NDArrayFactory::create('c', {1, 3, 3, 1}, { + 1, 2, 3, 4, 5, 6, 7, 8, 9 + }); + + NDArray expected = NDArrayFactory::create('c', {1, 6, 6, 1}, { + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 1.f, 1.f, 1.5f, 2.f, 2.f, 3.f, + 2.5f, 2.5f, 3.f, 3.5f, 3.5f, 4.5f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 4.f, 4.f, 4.5f, 5.f, 5.f, 6.f, + 7.f, 7.f, 7.5f, 8.f, 8.f, 9.f + }); + //input.linspace(1); +// auto size = NDArrayFactory::create({6, 6}); + nd4j::ops::resize_area op; + auto results = op.execute({&input}, {}, {6, 6}, {true}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + NDArray* result = results->at(0); + + result->printBuffer("Area Resized to 6x6"); + expected.printBuffer("Area Expect for 6x6"); + ASSERT_TRUE(expected.isSameShape(result)); + ASSERT_TRUE(expected.equalsTo(result)); + delete results; +} + /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, summaryStatsData_test1) { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp index 96234f41d..75db5989c 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp @@ -537,6 +537,20 @@ TEST_F(DeclarableOpsTests15, Test_BitCast_4) { } +TEST_F(DeclarableOpsTests15, Test_BitCast_4_1) { + auto x = NDArrayFactory::create('c', {1, 2}); + auto e = NDArrayFactory::create('c', {1, 2}, {4607182418800017408LL, 4611686018427387904LL}); // as TF 4607182418800017408, 4611686018427387904 + x.linspace(1.); + nd4j::ops::bitcast op; + + auto result = op.execute({&x}, {}, {nd4j::DataType::INT64}, {}); + ASSERT_EQ(Status::OK(), result->status()); + // e.printIndexedBuffer("Double to int64"); + auto res = result->at(0); + ASSERT_EQ(*res, e); + delete result; +} + TEST_F(DeclarableOpsTests15, Test_BitCast_5) { auto x = NDArrayFactory::create('c', {4, 4}, { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java index ad23d1266..7a4b00d3f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java @@ -89,6 +89,7 @@ public class ImportClassMapping { org.nd4j.linalg.api.ops.impl.image.ResizeBilinear.class, org.nd4j.linalg.api.ops.impl.image.ResizeBicubic.class, org.nd4j.linalg.api.ops.impl.image.ResizeNearestNeighbor.class, + org.nd4j.linalg.api.ops.impl.image.ResizeArea.class, org.nd4j.linalg.api.ops.impl.indexaccum.FirstIndex.class, org.nd4j.linalg.api.ops.impl.indexaccum.IAMax.class, org.nd4j.linalg.api.ops.impl.indexaccum.IAMin.class, diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/ResizeArea.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/ResizeArea.java new file mode 100644 index 000000000..8acf558e9 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/image/ResizeArea.java @@ -0,0 +1,114 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit, K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ +package org.nd4j.linalg.api.ops.impl.image; + +import lombok.NoArgsConstructor; +import lombok.NonNull; +import lombok.val; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.base.Preconditions; +import org.nd4j.imports.graphmapper.tf.TFGraphMapper; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.factory.Nd4j; +import org.tensorflow.framework.AttrValue; +import org.tensorflow.framework.GraphDef; +import org.tensorflow.framework.NodeDef; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +@NoArgsConstructor +public class ResizeArea extends DynamicCustomOp { + protected boolean alignCorners = false; + protected Integer height = null; + protected Integer width = null; + + public ResizeArea(@NonNull SameDiff sd, @NonNull SDVariable image, int height, int width, + boolean alignCorners) { + super(sd, image); + this.alignCorners = alignCorners; + this.height = height; + this.width = width; + addArgs(); + } + + public ResizeArea(@NonNull INDArray x, INDArray z, int height, int width, + boolean alignCorners) { + super(new INDArray[]{x}, new INDArray[]{z}); + this.alignCorners = alignCorners; + this.height = height; + this.width = width; + addArgs(); + } + + @Override + public String opName() { + return "resize_area"; + } + + @Override + public String tensorflowName() { + return "ResizeArea"; + } + + @Override + public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map attributesForNode, GraphDef graph) { + TFGraphMapper.initFunctionFromProperties(nodeDef.getOp(), this, attributesForNode, nodeDef, graph); + + val attrC = attributesForNode.get("align_corners"); + this.alignCorners = attrC != null ? attrC.getB() : false; + + addArgs(); + } + + protected void addArgs() { + iArguments.clear(); + if(height != null && width != null){ + INDArray size = Nd4j.createFromArray(new int[]{height,width}); + addInputArgument(size); + //iArguments.add(Long.valueOf(height)); + //iArguments.add(Long.valueOf(width)); + } + addBArgument(alignCorners); + } + + @Override + public Map propertiesForFunction() { + Map ret = new LinkedHashMap<>(); + ret.put("alignCorners", alignCorners); + ret.put("height", height); + ret.put("width", width); + return ret; + } + + @Override + public List doDiff(List f1) { + throw new UnsupportedOperationException(); + } + + @Override + public List calculateOutputDataTypes(List inputDataTypes){ + Preconditions.checkState(inputDataTypes != null && (inputDataTypes.size() == 1 || inputDataTypes.size() == 2), + "Expected 1 or 2 input datatypes for %s, got %s", getClass(), inputDataTypes); + return Collections.singletonList(DataType.FLOAT); + } +} + diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java index 15b9454da..944966654 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java @@ -34,6 +34,7 @@ import org.nd4j.linalg.api.ops.executioner.OpStatus; import org.nd4j.linalg.api.ops.impl.controlflow.Where; import org.nd4j.linalg.api.ops.impl.image.CropAndResize; import org.nd4j.linalg.api.ops.impl.image.NonMaxSuppression; +import org.nd4j.linalg.api.ops.impl.image.ResizeArea; import org.nd4j.linalg.api.ops.impl.image.ResizeBilinear; import org.nd4j.linalg.api.ops.impl.reduce.MmulBp; import org.nd4j.linalg.api.ops.impl.shape.Create; @@ -968,6 +969,33 @@ public class CustomOpsTests extends BaseNd4jTest { Nd4j.exec(op); } + @Test + public void testResizeArea1() { + + INDArray x = Nd4j.rand(DataType.FLOAT, 1, 2,3,4); + INDArray z = Nd4j.createUninitialized(DataType.FLOAT, 1, 10, 10, 4); + ResizeArea op = new ResizeArea(x, z, 10, 10, false); + Nd4j.exec(op); + } + + @Test + public void testResizeArea2() { + + INDArray image = Nd4j.linspace(DataType.FLOAT, 1.0f, 1.0f, 9 ).reshape(1,3,3,1); + INDArray output = Nd4j.createUninitialized(DataType.FLOAT, 1, 6, 6, 1); + INDArray expected = Nd4j.createFromArray(new float[]{ + 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, + 1.f, 1.f, 2.f, 2.f, 3.f, 3.f, + 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, + 4.f, 4.f, 5.f, 5.f, 6.f, 6.f, + 7.f, 7.f, 8.f, 8.f, 9.f, 9.f, + 7.f, 7.f, 8.f, 8.f, 9.f, 9.f + }).reshape(1,6,6,1); + ResizeArea op = new ResizeArea(image, output, 6, 6, false); + Nd4j.exec(op); + assertEquals(expected, output); + } + @Test public void testCompareAndBitpack() { INDArray in = Nd4j.createFromArray(new double[]{-12.f, -11.f, -10.f, -9.f, -8.f, -7.f, -6.f, -5.f, -4.f, -3.f,