From 30a8af566c4a010b28f1869015d9fe015638916b Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 13:45:18 +0300 Subject: [PATCH] The first working implementation of cuda kernel for draw_bounding_boxes op helper. --- .../helpers/cuda/image_draw_bounding_boxes.cu | 25 ++++++++++--------- .../layers_tests/DeclarableOpsTests10.cpp | 4 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 86912b74a..694ba39c6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -28,11 +28,12 @@ namespace helpers { static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape, Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) { + for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch - for (auto c = 0; c < colorSetSize; c += blockDim.x) { + for (auto c = threadIdx.x; c < colorSetSize; c += blockDim.x) { // box with shape - auto pos = 0; - auto internalBox = &boxes[pos];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); + auto pos = channels * c; + auto internalBox = &boxes[b * colorSetSize * 4 + c * 4];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); auto color = &colors[pos];//colorSet->at(c); auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0])); auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2])); @@ -42,8 +43,8 @@ namespace helpers { for (auto e = 0; e < channels; ++e) { Nd4jLong yMinPos[] = {b, y, colStart, e}; Nd4jLong yMaxPos[] = {b, y, colEnd, e}; - auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 4); - auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 4); + auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 0); + auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 0); output[zIndexYmin] = color[e]; output[zIndexYmax] = color[e]; } @@ -52,8 +53,8 @@ namespace helpers { for (auto e = 0; e < channels; ++e) { Nd4jLong xMinPos[] = {b, rowStart, x, e}; Nd4jLong xMaxPos[] = {b, rowEnd, x, e}; - auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 4); - auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 4); + auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 0); + auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 0); output[zIndexXmin] = color[e]; output[zIndexXmax] = color[e]; } @@ -62,6 +63,7 @@ namespace helpers { } } + template void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) { auto batchSize = images->sizeAt(0); @@ -70,14 +72,12 @@ namespace helpers { auto channels = images->sizeAt(3); auto stream = context->getCudaStream(); auto colorSetSize = colors->sizeAt(0); -// auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch -// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch -// auto colorSet = colors->allTensorsAlongDimension({1}); + auto imagesBuf = images->getDataBuffer()->specialAsT(); auto boxesBuf = boxes->getDataBuffer()->specialAsT(); auto colorsBuf = colors->getDataBuffer()->specialAsT(); auto outputBuf = output->dataBuffer()->specialAsT(); - drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), + drawBoundingBoxesKernel<<<1, 1, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(), outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize); } @@ -90,9 +90,10 @@ namespace helpers { // height = images->sizeAt(1), width = images->sizeAt(2) // colors - colors for each box given // set up color for each box as frame - + NDArray::prepareSpecialUse({output}, {images, boxes, colors}); output->assign(images); BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); + NDArray::registerSpecialUse({output}, {images, boxes, colors}); } BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 6caa7164d..446763096 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2104,7 +2104,9 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printIndexedBuffer("Bounded boxes 2"); +// result->syncToHost(); +// result->printBuffer("Bounded boxes 2"); +// expected.printBuffer("Bounded expec 2"); ASSERT_TRUE(expected.isSameShapeStrict(result)); ASSERT_TRUE(expected.equalsTo(result));