From ae09cfee3228df4f36f53bed4cea30f49f1f6f5f Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 00:09:46 +0300 Subject: [PATCH] Next approach of cuda imlementation for draw_bounding_boxes op helper. --- .../helpers/cuda/image_draw_bounding_boxes.cu | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index aaca04a09..86912b74a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -25,27 +25,37 @@ namespace ops { namespace helpers { template - static __global__ void drawBoundignBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, - Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape) { - for (auto b = 0; b < batchSize; ++b) { // loop by batch - for (auto c = 0; c < colorSet->size(); ++c) { + static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes, + Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape, + Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) { + for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch + for (auto c = 0; c < colorSetSize; c += blockDim.x) { // box with shape - auto internalBox = (*boxes)(b, {0})(c, {0});//internalBoxes->at(c); - auto color = colorSet->at(c); - auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox.e(0))); - auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox.e(2))); - auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox.e(1))); - auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox.e(3))); + auto pos = 0; + auto internalBox = &boxes[pos];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c); + auto color = &colors[pos];//colorSet->at(c); + auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0])); + auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2])); + auto colStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((width - 1) * internalBox[1])); + auto colEnd = nd4j::math::nd4j_min(Nd4jLong(width - 1), Nd4jLong ((width - 1) * internalBox[3])); for (auto y = rowStart; y <= rowEnd; y++) { - for (auto e = 0; e < color->lengthOf(); ++e) { - output->p(b, y, colStart, e, color->e(e)); - output->p(b, y, colEnd, e, color->e(e)); + for (auto e = 0; e < channels; ++e) { + Nd4jLong yMinPos[] = {b, y, colStart, e}; + Nd4jLong yMaxPos[] = {b, y, colEnd, e}; + auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 4); + auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 4); + output[zIndexYmin] = color[e]; + output[zIndexYmax] = color[e]; } } for (auto x = colStart + 1; x < colEnd; x++) { - for (auto e = 0; e < color->lengthOf(); ++e) { - output->p(b, rowStart, x, e, color->e(e)); - output->p(b, rowEnd, x, e, color->e(e)); + for (auto e = 0; e < channels; ++e) { + Nd4jLong xMinPos[] = {b, rowStart, x, e}; + Nd4jLong xMaxPos[] = {b, rowEnd, x, e}; + auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 4); + auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 4); + output[zIndexXmin] = color[e]; + output[zIndexXmax] = color[e]; } } } @@ -54,8 +64,22 @@ namespace helpers { } template void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) { - drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, imagesShape, boxesBuf, boxesShape, colorsBuf, colorsShape, - outputBuf, outputShape, ); + auto batchSize = images->sizeAt(0); + auto height = images->sizeAt(1); + auto width = images->sizeAt(2); + auto channels = images->sizeAt(3); + auto stream = context->getCudaStream(); + auto colorSetSize = colors->sizeAt(0); +// auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch +// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch +// auto colorSet = colors->allTensorsAlongDimension({1}); + auto imagesBuf = images->getDataBuffer()->specialAsT(); + auto boxesBuf = boxes->getDataBuffer()->specialAsT(); + auto colorsBuf = colors->getDataBuffer()->specialAsT(); + auto outputBuf = output->dataBuffer()->specialAsT(); + drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), + boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(), + outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize); } void drawBoundingBoxesFunctor(nd4j::LaunchContext * context, NDArray* images, NDArray* boxes, NDArray* colors, NDArray* output) { @@ -67,19 +91,10 @@ namespace helpers { // colors - colors for each box given // set up color for each box as frame - auto height = images->sizeAt(1); - auto width = images->sizeAt(2); - auto channels = images->sizeAt(3); - auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch - auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch - auto colorSet = colors->allTensorsAlongDimension({1}); output->assign(images); - - delete colorSet; - delete imageList; - delete boxList; + BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); } - + BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); } } }