From 8f70b4441f19ae54f6ae9b9a8663a19c4b5e6a4c Mon Sep 17 00:00:00 2001 From: shugeo Date: Fri, 4 Oct 2019 18:32:21 +0300 Subject: [PATCH 1/8] draw_bounding_boxes op implementation. Inital revision. --- .../parity_ops/draw_bounding_boxes.cpp | 47 +++++++++++++++++++ .../ops/declarable/headers/parity_ops.h | 17 +++++++ .../layers_tests/DeclarableOpsTests10.cpp | 33 +++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp new file mode 100644 index 000000000..78d8910fb --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author George A. Shulinok +// + +#include +#if NOT_EXCLUDED(OP_draw_bounding_boxes) + +//#include +#include +namespace nd4j { + namespace ops { + OP_IMPL(draw_bounding_boxes, 3, 1, true) { + + auto image = INPUT_VARIABLE(0); + auto boxes = INPUT_VARIABLE(1); + auto colors = INPUT_VARIABLE(2); + + return ND4J_STATUS_OK; + } + + DECLARE_TYPES(draw_bounding_boxes) { + getOpDescriptor() + ->setAllowedInputTypes(0, {HALF, FLOAT32})// TF allows HALF and FLOAT32 only + ->setAllowedInputTypes(1, {FLOAT32}) // as TF + ->setAllowedInputTypes(2, {FLOAT32}) // as TF + ->setAllowedOutputTypes({HALF, FLOAT32}); // TF allows HALF and FLOAT32 only + } + } +} + +#endif diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index 605ce95d3..8f6849ef7 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -1244,6 +1244,23 @@ namespace nd4j { DECLARE_CUSTOM_OP(extract_image_patches, 1, 1, false, 0, 7); #endif + /** + * draw_bounding_boxes op - modified input image with given colors exept given boxes. + * + * input params: + * 0 - images tensor (4D) with shape {batch, width, height, channels}, where channes is 1 (BW image), + * 3 (RGB) or 4 (RGBA) + * 1 - boxes tensor (3D) with shape {batch, number_of_boxes, 4} where last dimension encoded as + * (y_min, x_min, y_max, x_max), all values in between 0. and 1. + * 2 - colours tensor (2D) with shape {number_of_boxes, channels} -- bordering color set (palette) + * + * output: + * 0 - 4D tensor with same shape as images (input 0) + */ + #if NOT_EXCLUDED(OP_draw_bounding_boxes) + DECLARE_OP(draw_bounding_boxes, 3, 1, true); + #endif + /** * roll - op porting from numpy (https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.roll.html) * diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 82ed21709..14bc22fae 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2043,6 +2043,39 @@ TEST_F(DeclarableOpsTests10, Image_CropAndResize_5) { delete results; } +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { + int axis = 0; + NDArray images = NDArrayFactory::create('c', {2,4,5,3}); + NDArray boxes = NDArrayFactory::create('c', {2, 2, 4}, {0,0,1,1}); + NDArray colors = NDArrayFactory::create('c', {2, 3}, {201., 202., 203., 128., 129., 130.}); + + //NDArray ('c', {6}, {0.9f, .75f, .6f, .95f, .5f, .3f}); + NDArray expected = NDArrayFactory::create('c', {2,4,5,3}, { + 127., 128., 129., 127., 128., 129., 127., 128., 129., 127., 128., 129., 201., 202., 203., + 127., 128., 129., 19., 20., 21., 22., 23., 24., 127., 128., 129., 201., 202., 203., + 127., 128., 129., 127., 128., 129., 127., 128., 129., 127., 128., 129., 201., 202., 203., + 201., 202., 203., 201. ,202. ,203., 201., 202., 203., 201., 202., 203., 201., 202., 203., + + 61., 62., 63., 201., 202., 203., 201., 202., 203., 70., 71., 72., 73., 74., 75., + 76., 77., 78., 127., 128., 129., 127., 128., 129., 85., 86., 87., 88., 89., 90., + 91., 92., 93., 201., 202., 203., 201., 202., 203., 100., 101., 102., 103., 104., 105., + 106., 107., 108., 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120. + }); + + nd4j::ops::draw_bounding_boxes op; + auto results = op.execute({&images, &boxes, &colors}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto result = results->at(0); + result->printIndexedBuffer("Bounded boxes"); + ASSERT_TRUE(expected.isSameShapeStrict(result)); + ASSERT_TRUE(expected.equalsTo(result)); + + delete results; +} + //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) { From 53a2ebddbeea4e9951852d70b76288f1072b5dab Mon Sep 17 00:00:00 2001 From: shugeo Date: Fri, 4 Oct 2019 20:46:26 +0300 Subject: [PATCH 2/8] Added test and helpers for draw_bounding_boxes op both cpu and cuda related. --- .../parity_ops/draw_bounding_boxes.cpp | 10 +++--- .../helpers/cpu/image_draw_bounding_boxes.cpp | 33 +++++++++++++++++++ .../helpers/cuda/image_draw_bounding_boxes.cu | 33 +++++++++++++++++++ .../layers_tests/DeclarableOpsTests10.cpp | 9 +++-- 4 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp index 78d8910fb..7610e6161 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/draw_bounding_boxes.cpp @@ -21,16 +21,18 @@ #include #if NOT_EXCLUDED(OP_draw_bounding_boxes) -//#include -#include +#include +#include namespace nd4j { namespace ops { OP_IMPL(draw_bounding_boxes, 3, 1, true) { - auto image = INPUT_VARIABLE(0); + auto images = INPUT_VARIABLE(0); auto boxes = INPUT_VARIABLE(1); auto colors = INPUT_VARIABLE(2); - + auto output = OUTPUT_VARIABLE(0); + + helpers::drawBoundingBoxesFunctor(block.launchContext(), images, boxes, colors, output); return ND4J_STATUS_OK; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp new file mode 100644 index 000000000..6f32c1cbd --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#include +#include + +namespace nd4j { +namespace ops { +namespace helpers { + + void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { + + } + +} +} +} diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu new file mode 100644 index 000000000..9e4db944c --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#include +#include + +namespace nd4j { +namespace ops { +namespace helpers { + + void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { + + } + +} +} +} diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 14bc22fae..ce327f0cf 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2045,9 +2045,12 @@ TEST_F(DeclarableOpsTests10, Image_CropAndResize_5) { //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { - int axis = 0; NDArray images = NDArrayFactory::create('c', {2,4,5,3}); - NDArray boxes = NDArrayFactory::create('c', {2, 2, 4}, {0,0,1,1}); + NDArray boxes = NDArrayFactory::create('c', {2, 2, 4}, { + 0. , 0. , 1. , 1. , 0.1, 0.2, 0.9, 0.8, + 0.3, 0.3, 0.7, 0.7, 0.4, 0.4, 0.6, 0.6 + }); + NDArray colors = NDArrayFactory::create('c', {2, 3}, {201., 202., 203., 128., 129., 130.}); //NDArray ('c', {6}, {0.9f, .75f, .6f, .95f, .5f, .3f}); @@ -2062,7 +2065,7 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { 91., 92., 93., 201., 202., 203., 201., 202., 203., 100., 101., 102., 103., 104., 105., 106., 107., 108., 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120. }); - + images.linspace(1.); nd4j::ops::draw_bounding_boxes op; auto results = op.execute({&images, &boxes, &colors}, {}, {}); From 16a66a65e3bb68a726807bd3c43123624904f42e Mon Sep 17 00:00:00 2001 From: shugeo Date: Fri, 4 Oct 2019 21:16:34 +0300 Subject: [PATCH 3/8] Added helper declaration for draw_bounding_boxes op. --- .../helpers/image_draw_bounding_boxes.h | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 libnd4j/include/ops/declarable/helpers/image_draw_bounding_boxes.h diff --git a/libnd4j/include/ops/declarable/helpers/image_draw_bounding_boxes.h b/libnd4j/include/ops/declarable/helpers/image_draw_bounding_boxes.h new file mode 100644 index 000000000..dd61d9532 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/image_draw_bounding_boxes.h @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#ifndef __IMAGE_DRAW_BOUNDING_BOXES_H_HELPERS__ +#define __IMAGE_DRAW_BOUNDING_BOXES_H_HELPERS__ +#include +#include + +namespace nd4j { +namespace ops { +namespace helpers { + + void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output); + +} +} +} +#endif From 78443ffebf8210dc08c3979342b1f14f62409da8 Mon Sep 17 00:00:00 2001 From: shugeo Date: Mon, 7 Oct 2019 15:04:44 +0300 Subject: [PATCH 4/8] Working implementation of draw_bounding_boxes op for cpu. --- libnd4j/blas/NDArray.h | 1 + libnd4j/blas/NDArray.hpp | 18 ++++++++ .../helpers/cpu/image_draw_bounding_boxes.cpp | 46 +++++++++++++++++++ .../layers_tests/DeclarableOpsTests10.cpp | 36 ++++++++++++++- 4 files changed, 99 insertions(+), 2 deletions(-) diff --git a/libnd4j/blas/NDArray.h b/libnd4j/blas/NDArray.h index 21eedc665..10847f882 100644 --- a/libnd4j/blas/NDArray.h +++ b/libnd4j/blas/NDArray.h @@ -1422,6 +1422,7 @@ namespace nd4j { template void p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const T value); + void p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, NDArray const& value); template diff --git a/libnd4j/blas/NDArray.hpp b/libnd4j/blas/NDArray.hpp index 0f0621a80..1d810b421 100644 --- a/libnd4j/blas/NDArray.hpp +++ b/libnd4j/blas/NDArray.hpp @@ -4187,6 +4187,24 @@ void NDArray::p(const Nd4jLong i, const NDArray& scalar) { NDArray::registerPrimaryUse({this}, {&scalar}); } +//////////////////////////////////////////////////////////////////////// + void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const NDArray& scalar) { + + if(!scalar.isScalar()) + throw std::invalid_argument("NDArray::p method: input array must be scalar!"); + if (i >= _length) + throw std::invalid_argument("NDArray::p(i, NDArray_scalar): input index is out of array length !"); + +// void *p = reinterpret_cast(scalar.getBuffer()); + Nd4jLong coords[4] = {i, j, k, l}; + auto xOffset = shape::getOffset(getShapeInfo(), coords); + + NDArray::preparePrimaryUse({this}, {&scalar}, true); +// BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (this->getBuffer(), xOffset, scalar.dataType(), scalar.getBuffer()), LIBND4J_TYPES); + NDArray::registerPrimaryUse({this}, {&scalar}); + } + ////////////////////////////////////////////////////////////////////////// void NDArray::addRowVector(const NDArray *row, NDArray *target) const { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp index 6f32c1cbd..1d1e32b52 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp @@ -25,7 +25,53 @@ namespace ops { namespace helpers { void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { + // images - batch of 3D images with BW (last dim = 1), RGB (last dim = 3) or RGBA (last dim = 4) channel set + // boxes - batch of 2D bounds with last dim (y_start, x_start, y_end, x_end) to compute i and j as + // floor((height - 1 ) * y_start) => rowStart, floor((height - 1) * y_end) => rowEnd + // floor((width - 1 ) * x_start) => colStart, floor((width - 1) * x_end) => colEnd + // height = images->sizeAt(1), width = images->sizeAt(2) + // colors - colors for each box given + // set up color for each box as frame + auto height = images->sizeAt(1); + auto width = images->sizeAt(2); + auto channels = images->sizeAt(3); + auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch + auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch + output->assign(images); + for (auto b = 0; b < imageList->size(); ++b) { // loop by batch +// auto image = imageList->at(b); + auto box = boxList->at(b); + + auto internalBoxes = box->allTensorsAlongDimension({1}); + auto colorSet = colors->allTensorsAlongDimension({1}); + + for (auto c = 0; c < colorSet->size(); ++c) { + // box with shape + auto internalBox = internalBoxes->at(c); + auto color = colorSet->at(c); + auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox->e(0))); + auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox->e(2))); + auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox->e(1))); + auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox->e(3))); + for (auto y = rowStart; y <= rowEnd; y++) { + for (auto e = 0; e < color->lengthOf(); ++e) { + output->p(b, y, colStart, e, color->e(e)); + output->p(b, y, colEnd, e, color->e(e)); + } + } + for (auto x = colStart + 1; x < colEnd; x++) { + for (auto e = 0; e < color->lengthOf(); ++e) { + output->p(b, rowStart, x, e, color->e(e)); + output->p(b, rowEnd, x, e, color->e(e)); + } + } + } + delete colorSet; + delete internalBoxes; + } + delete imageList; + delete boxList; } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index ce327f0cf..c61cda29f 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2051,7 +2051,7 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { 0.3, 0.3, 0.7, 0.7, 0.4, 0.4, 0.6, 0.6 }); - NDArray colors = NDArrayFactory::create('c', {2, 3}, {201., 202., 203., 128., 129., 130.}); + NDArray colors = NDArrayFactory::create('c', {2, 3}, {201., 202., 203., 127., 128., 129.}); //NDArray ('c', {6}, {0.9f, .75f, .6f, .95f, .5f, .3f}); NDArray expected = NDArrayFactory::create('c', {2,4,5,3}, { @@ -2072,7 +2072,39 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printIndexedBuffer("Bounded boxes"); + result->printBuffer("Bounded boxes"); + expected.printBuffer("Bounded expec"); + ASSERT_TRUE(expected.isSameShapeStrict(result)); + ASSERT_TRUE(expected.equalsTo(result)); + + delete results; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) { + NDArray images = NDArrayFactory::create('c', {1,9,9,1}); + NDArray boxes = NDArrayFactory::create('c', {1, 1, 4}, {0.2, 0.2, 0.7, 0.7}); + NDArray colors = NDArrayFactory::create('c', {1, 1}, {0.95}); + + //NDArray ('c', {6}, {0.9f, .75f, .6f, .95f, .5f, .3f}); + NDArray expected = NDArrayFactory::create('c', {1,9,9,1}, { + 1.1 , 2.1, 3.1 , 4.1 , 5.1 , 6.1 , 7.1 , 8.1 , 9.1 , + 10.1 , 0.95, 0.95, 0.95, 0.95, 0.95, 16.1 , 17.1 , 18.1 , + 19.1 , 0.95, 21.1, 22.1, 23.1, 0.95, 25.1 , 26.1 , 27.1 , + 28.1 , 0.95, 30.1, 31.1, 32.1, 0.95, 34.1 , 35.1 , 36.1 , + 37.1 , 0.95, 39.1, 40.1, 41.1, 0.95, 43.1 , 44.1 , 45.1 , + 46.1 , 0.95, 0.95, 0.95, 0.95, 0.95, 52.1 , 53.1 , 54.1 , + 55.1 , 56.1, 57.1 , 58.1 , 59.1 , 60.1 , 61.1 , 62.1 , 63.1 , + 64.1 , 65.1, 66.1 , 67.1 , 68.1 , 69.1 , 70.1 , 71.1 , 72.1 , + 73.1 , 74.1, 75.1 , 76.1 , 77.1 , 78.1 , 79.1 , 80.1 , 81.1 }); + images.linspace(1.1); + nd4j::ops::draw_bounding_boxes op; + auto results = op.execute({&images, &boxes, &colors}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto result = results->at(0); + result->printIndexedBuffer("Bounded boxes 2"); ASSERT_TRUE(expected.isSameShapeStrict(result)); ASSERT_TRUE(expected.equalsTo(result)); From 6cf3a8fa9c760e0644a4e98b7bd128c26d46990d Mon Sep 17 00:00:00 2001 From: shugeo Date: Mon, 7 Oct 2019 17:51:07 +0300 Subject: [PATCH 5/8] Refactored cpu implementatio and added cuda aproach. --- .../helpers/cpu/image_draw_bounding_boxes.cpp | 33 ++++++------ .../helpers/cuda/image_draw_bounding_boxes.cu | 54 ++++++++++++++++++- .../layers_tests/DeclarableOpsTests10.cpp | 4 +- 3 files changed, 70 insertions(+), 21 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp index 1d1e32b52..96aaad1c7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp @@ -32,28 +32,25 @@ namespace helpers { // height = images->sizeAt(1), width = images->sizeAt(2) // colors - colors for each box given // set up color for each box as frame - + auto batchSize = images->sizeAt(0); auto height = images->sizeAt(1); auto width = images->sizeAt(2); auto channels = images->sizeAt(3); - auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch - auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch - output->assign(images); - for (auto b = 0; b < imageList->size(); ++b) { // loop by batch + //auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch +// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch + auto colorSet = colors->allTensorsAlongDimension({1}); + output->assign(images); // fill up all output with input images, then fill up boxes + for (auto b = 0; b < batchSize; ++b) { // loop by batch // auto image = imageList->at(b); - auto box = boxList->at(b); - - auto internalBoxes = box->allTensorsAlongDimension({1}); - auto colorSet = colors->allTensorsAlongDimension({1}); for (auto c = 0; c < colorSet->size(); ++c) { // box with shape - auto internalBox = internalBoxes->at(c); + auto internalBox = (*boxes)(b, {0})(c, {0});//internalBoxes->at(c); auto color = colorSet->at(c); - auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox->e(0))); - auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox->e(2))); - auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox->e(1))); - auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox->e(3))); + auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox.e(0))); + auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox.e(2))); + auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox.e(1))); + auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox.e(3))); for (auto y = rowStart; y <= rowEnd; y++) { for (auto e = 0; e < color->lengthOf(); ++e) { output->p(b, y, colStart, e, color->e(e)); @@ -67,11 +64,11 @@ namespace helpers { } } } - delete colorSet; - delete internalBoxes; +// delete internalBoxes; } - delete imageList; - delete boxList; + delete colorSet; +// delete imageList; +// delete boxList; } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 9e4db944c..aaca04a09 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -24,8 +24,60 @@ namespace nd4j { namespace ops { namespace helpers { + template + static __global__ void drawBoundignBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, + Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape) { + for (auto b = 0; b < batchSize; ++b) { // loop by batch + for (auto c = 0; c < colorSet->size(); ++c) { + // box with shape + auto internalBox = (*boxes)(b, {0})(c, {0});//internalBoxes->at(c); + auto color = colorSet->at(c); + auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox.e(0))); + auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox.e(2))); + auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox.e(1))); + auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox.e(3))); + for (auto y = rowStart; y <= rowEnd; y++) { + for (auto e = 0; e < color->lengthOf(); ++e) { + output->p(b, y, colStart, e, color->e(e)); + output->p(b, y, colEnd, e, color->e(e)); + } + } + for (auto x = colStart + 1; x < colEnd; x++) { + for (auto e = 0; e < color->lengthOf(); ++e) { + output->p(b, rowStart, x, e, color->e(e)); + output->p(b, rowEnd, x, e, color->e(e)); + } + } + } + } + + } + template + void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) { + drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, imagesShape, boxesBuf, boxesShape, colorsBuf, colorsShape, + outputBuf, outputShape, ); + } + void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { - + // images - batch of 3D images with BW (last dim = 1), RGB (last dim = 3) or RGBA (last dim = 4) channel set + // boxes - batch of 2D bounds with last dim (y_start, x_start, y_end, x_end) to compute i and j as + // floor((height - 1 ) * y_start) => rowStart, floor((height - 1) * y_end) => rowEnd + // floor((width - 1 ) * x_start) => colStart, floor((width - 1) * x_end) => colEnd + // height = images->sizeAt(1), width = images->sizeAt(2) + // colors - colors for each box given + // set up color for each box as frame + + auto height = images->sizeAt(1); + auto width = images->sizeAt(2); + auto channels = images->sizeAt(3); + auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch + auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch + auto colorSet = colors->allTensorsAlongDimension({1}); + output->assign(images); + + delete colorSet; + delete imageList; + delete boxList; } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index c61cda29f..6caa7164d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2072,8 +2072,8 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_1) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printBuffer("Bounded boxes"); - expected.printBuffer("Bounded expec"); +// result->printBuffer("Bounded boxes"); +// expected.printBuffer("Bounded expec"); ASSERT_TRUE(expected.isSameShapeStrict(result)); ASSERT_TRUE(expected.equalsTo(result)); From ae09cfee3228df4f36f53bed4cea30f49f1f6f5f Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 00:09:46 +0300 Subject: [PATCH 6/8] Next approach of cuda imlementation for draw_bounding_boxes op helper. --- .../helpers/cuda/image_draw_bounding_boxes.cu | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index aaca04a09..86912b74a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -25,27 +25,37 @@ namespace ops { namespace helpers { template - static __global__ void drawBoundignBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, - Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape) { - for (auto b = 0; b < batchSize; ++b) { // loop by batch - for (auto c = 0; c < colorSet->size(); ++c) { + static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, + Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape, + Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) { + for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch + for (auto c = 0; c < colorSetSize; c += blockDim.x) { // box with shape - auto internalBox = (*boxes)(b, {0})(c, {0});//internalBoxes->at(c); - auto color = colorSet->at(c); - auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox.e(0))); - auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox.e(2))); - auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox.e(1))); - auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox.e(3))); + auto pos = 0; + auto internalBox = &boxes[pos];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); + auto color = &colors[pos];//colorSet->at(c); + auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0])); + auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2])); + auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox[1])); + auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox[3])); for (auto y = rowStart; y <= rowEnd; y++) { - for (auto e = 0; e < color->lengthOf(); ++e) { - output->p(b, y, colStart, e, color->e(e)); - output->p(b, y, colEnd, e, color->e(e)); + for (auto e = 0; e < channels; ++e) { + Nd4jLong yMinPos[] = {b, y, colStart, e}; + Nd4jLong yMaxPos[] = {b, y, colEnd, e}; + auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 4); + auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 4); + output[zIndexYmin] = color[e]; + output[zIndexYmax] = color[e]; } } for (auto x = colStart + 1; x < colEnd; x++) { - for (auto e = 0; e < color->lengthOf(); ++e) { - output->p(b, rowStart, x, e, color->e(e)); - output->p(b, rowEnd, x, e, color->e(e)); + for (auto e = 0; e < channels; ++e) { + Nd4jLong xMinPos[] = {b, rowStart, x, e}; + Nd4jLong xMaxPos[] = {b, rowEnd, x, e}; + auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 4); + auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 4); + output[zIndexXmin] = color[e]; + output[zIndexXmax] = color[e]; } } } @@ -54,8 +64,22 @@ namespace helpers { } template void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) { - drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, imagesShape, boxesBuf, boxesShape, colorsBuf, colorsShape, - outputBuf, outputShape, ); + auto batchSize = images->sizeAt(0); + auto height = images->sizeAt(1); + auto width = images->sizeAt(2); + auto channels = images->sizeAt(3); + auto stream = context->getCudaStream(); + auto colorSetSize = colors->sizeAt(0); +// auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch +// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch +// auto colorSet = colors->allTensorsAlongDimension({1}); + auto imagesBuf = images->getDataBuffer()->specialAsT(); + auto boxesBuf = boxes->getDataBuffer()->specialAsT(); + auto colorsBuf = colors->getDataBuffer()->specialAsT(); + auto outputBuf = output->dataBuffer()->specialAsT(); + drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), + boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(), + outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize); } void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { @@ -67,19 +91,10 @@ namespace helpers { // colors - colors for each box given // set up color for each box as frame - auto height = images->sizeAt(1); - auto width = images->sizeAt(2); - auto channels = images->sizeAt(3); - auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch - auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch - auto colorSet = colors->allTensorsAlongDimension({1}); output->assign(images); - - delete colorSet; - delete imageList; - delete boxList; + BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); } - + BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); } } } From 30a8af566c4a010b28f1869015d9fe015638916b Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 13:45:18 +0300 Subject: [PATCH 7/8] The first working implementation of cuda kernel for draw_bounding_boxes op helper. --- .../helpers/cuda/image_draw_bounding_boxes.cu | 25 ++++++++++--------- .../layers_tests/DeclarableOpsTests10.cpp | 4 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 86912b74a..694ba39c6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -28,11 +28,12 @@ namespace helpers { static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape, Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) { + for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch - for (auto c = 0; c < colorSetSize; c += blockDim.x) { + for (auto c = threadIdx.x; c < colorSetSize; c += blockDim.x) { // box with shape - auto pos = 0; - auto internalBox = &boxes[pos];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); + auto pos = channels * c; + auto internalBox = &boxes[b * colorSetSize * 4 + c * 4];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); auto color = &colors[pos];//colorSet->at(c); auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0])); auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2])); @@ -42,8 +43,8 @@ namespace helpers { for (auto e = 0; e < channels; ++e) { Nd4jLong yMinPos[] = {b, y, colStart, e}; Nd4jLong yMaxPos[] = {b, y, colEnd, e}; - auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 4); - auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 4); + auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 0); + auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 0); output[zIndexYmin] = color[e]; output[zIndexYmax] = color[e]; } @@ -52,8 +53,8 @@ namespace helpers { for (auto e = 0; e < channels; ++e) { Nd4jLong xMinPos[] = {b, rowStart, x, e}; Nd4jLong xMaxPos[] = {b, rowEnd, x, e}; - auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 4); - auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 4); + auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 0); + auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 0); output[zIndexXmin] = color[e]; output[zIndexXmax] = color[e]; } @@ -62,6 +63,7 @@ namespace helpers { } } + template void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) { auto batchSize = images->sizeAt(0); @@ -70,14 +72,12 @@ namespace helpers { auto channels = images->sizeAt(3); auto stream = context->getCudaStream(); auto colorSetSize = colors->sizeAt(0); -// auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch -// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch -// auto colorSet = colors->allTensorsAlongDimension({1}); + auto imagesBuf = images->getDataBuffer()->specialAsT(); auto boxesBuf = boxes->getDataBuffer()->specialAsT(); auto colorsBuf = colors->getDataBuffer()->specialAsT(); auto outputBuf = output->dataBuffer()->specialAsT(); - drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), + drawBoundingBoxesKernel<<<1, 1, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(), outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize); } @@ -90,9 +90,10 @@ namespace helpers { // height = images->sizeAt(1), width = images->sizeAt(2) // colors - colors for each box given // set up color for each box as frame - + NDArray::prepareSpecialUse({output}, {images, boxes, colors}); output->assign(images); BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); + NDArray::registerSpecialUse({output}, {images, boxes, colors}); } BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 6caa7164d..446763096 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2104,7 +2104,9 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printIndexedBuffer("Bounded boxes 2"); +// result->syncToHost(); +// result->printBuffer("Bounded boxes 2"); +// expected.printBuffer("Bounded expec 2"); ASSERT_TRUE(expected.isSameShapeStrict(result)); ASSERT_TRUE(expected.equalsTo(result)); From 8fe5a1fa966e7f7b5aaddad5ae58662404527496 Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 15:42:27 +0300 Subject: [PATCH 8/8] The working implementation of draw_bounding_boxes op. --- .../helpers/cpu/image_draw_bounding_boxes.cpp | 2 ++ .../helpers/cuda/image_draw_bounding_boxes.cu | 21 +++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp index 96aaad1c7..05c5eaf6e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_draw_bounding_boxes.cpp @@ -40,6 +40,8 @@ namespace helpers { // auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch auto colorSet = colors->allTensorsAlongDimension({1}); output->assign(images); // fill up all output with input images, then fill up boxes + + PRAGMA_OMP_PARALLEL_FOR for (auto b = 0; b < batchSize; ++b) { // loop by batch // auto image = imageList->at(b); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 694ba39c6..eb632e70c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -29,32 +29,31 @@ namespace helpers { Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape, Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) { - for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch - for (auto c = threadIdx.x; c < colorSetSize; c += blockDim.x) { + for (auto b = blockIdx.x; b < (int)batchSize; b += gridDim.x) { // loop by batch + for (auto c = 0; c < colorSetSize; c++) { // box with shape - auto pos = channels * c; auto internalBox = &boxes[b * colorSetSize * 4 + c * 4];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); - auto color = &colors[pos];//colorSet->at(c); + auto color = &colors[channels * c];//colorSet->at(c); auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0])); auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2])); auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox[1])); auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox[3])); - for (auto y = rowStart; y <= rowEnd; y++) { + for (auto y = rowStart + threadIdx.x; y <= rowEnd; y += blockDim.x) { for (auto e = 0; e < channels; ++e) { Nd4jLong yMinPos[] = {b, y, colStart, e}; Nd4jLong yMaxPos[] = {b, y, colEnd, e}; - auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 0); - auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 0); + auto zIndexYmin = shape::getOffset(outputShape, yMinPos); + auto zIndexYmax = shape::getOffset(outputShape, yMaxPos); output[zIndexYmin] = color[e]; output[zIndexYmax] = color[e]; } } - for (auto x = colStart + 1; x < colEnd; x++) { + for (auto x = colStart + 1 + threadIdx.x; x < colEnd; x += blockDim.x) { for (auto e = 0; e < channels; ++e) { Nd4jLong xMinPos[] = {b, rowStart, x, e}; Nd4jLong xMaxPos[] = {b, rowEnd, x, e}; - auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 0); - auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 0); + auto zIndexXmin = shape::getOffset(outputShape, xMinPos); + auto zIndexXmax = shape::getOffset(outputShape, xMaxPos); output[zIndexXmin] = color[e]; output[zIndexXmax] = color[e]; } @@ -77,7 +76,7 @@ namespace helpers { auto boxesBuf = boxes->getDataBuffer()->specialAsT(); auto colorsBuf = colors->getDataBuffer()->specialAsT(); auto outputBuf = output->dataBuffer()->specialAsT(); - drawBoundingBoxesKernel<<<1, 1, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), + drawBoundingBoxesKernel<< 128? 128: batchSize, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(), outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize); }